• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- begin                                     guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2013-2017 OpenWorks
12       info@open-works.net
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27    02110-1301, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 /* KNOWN LIMITATIONS 2014-Nov-16
33 
34    * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
35 
36      Also FP comparison "unordered" .. is implemented as normal FP
37      comparison.
38 
39      Both should be fixed.  They behave incorrectly in the presence of
40      NaNs.
41 
42      FMULX is treated the same as FMUL.  That's also not correct.
43 
44    * Floating multiply-add (etc) insns.  Are split into a multiply and
45      an add, and so suffer double rounding and hence sometimes the
46      least significant mantissa bit is incorrect.  Fix: use the IR
47      multiply-add IROps instead.
48 
49    * FRINTA, FRINTN are kludged .. they just round to nearest.  No special
50      handling for the "ties" case.  FRINTX might be dubious too.
51 
52    * Ditto FCVTXN.  No idea what "round to odd" means.  This implementation
53      just rounds to nearest.
54 */
55 
56 /* "Special" instructions.
57 
58    This instruction decoder can decode four special instructions
59    which mean nothing natively (are no-ops as far as regs/mem are
60    concerned) but have meaning for supporting Valgrind.  A special
61    instruction is flagged by a 16-byte preamble:
62 
63       93CC0D8C 93CC358C 93CCCD8C 93CCF58C
64       (ror x12, x12, #3;   ror x12, x12, #13
65        ror x12, x12, #51;  ror x12, x12, #61)
66 
67    Following that, one of the following 3 are allowed
68    (standard interpretation in parentheses):
69 
70       AA0A014A (orr x10,x10,x10)   X3 = client_request ( X4 )
71       AA0B016B (orr x11,x11,x11)   X3 = guest_NRADDR
72       AA0C018C (orr x12,x12,x12)   branch-and-link-to-noredir X8
73       AA090129 (orr x9,x9,x9)      IR injection
74 
75    Any other bytes following the 16-byte preamble are illegal and
76    constitute a failure in instruction decoding.  This all assumes
77    that the preamble will never occur except in specific code
78    fragments designed for Valgrind to catch.
79 */
80 
81 /* Translates ARM64 code to IR. */
82 
83 #include "libvex_basictypes.h"
84 #include "libvex_ir.h"
85 #include "libvex.h"
86 #include "libvex_guest_arm64.h"
87 
88 #include "main_util.h"
89 #include "main_globals.h"
90 #include "guest_generic_bb_to_IR.h"
91 #include "guest_arm64_defs.h"
92 
93 
94 /*------------------------------------------------------------*/
95 /*--- Globals                                              ---*/
96 /*------------------------------------------------------------*/
97 
98 /* These are set at the start of the translation of a instruction, so
99    that we don't have to pass them around endlessly.  CONST means does
100    not change during translation of the instruction.
101 */
102 
103 /* CONST: what is the host's endianness?  We need to know this in
104    order to do sub-register accesses to the SIMD/FP registers
105    correctly. */
106 static VexEndness host_endness;
107 
108 /* CONST: The guest address for the instruction currently being
109    translated.  */
110 static Addr64 guest_PC_curr_instr;
111 
112 /* MOD: The IRSB* into which we're generating code. */
113 static IRSB* irsb;
114 
115 
116 /*------------------------------------------------------------*/
117 /*--- Debugging output                                     ---*/
118 /*------------------------------------------------------------*/
119 
120 #define DIP(format, args...)           \
121    if (vex_traceflags & VEX_TRACE_FE)  \
122       vex_printf(format, ## args)
123 
124 #define DIS(buf, format, args...)      \
125    if (vex_traceflags & VEX_TRACE_FE)  \
126       vex_sprintf(buf, format, ## args)
127 
128 
129 /*------------------------------------------------------------*/
130 /*--- Helper bits and pieces for deconstructing the        ---*/
131 /*--- arm insn stream.                                     ---*/
132 /*------------------------------------------------------------*/
133 
134 /* Do a little-endian load of a 32-bit word, regardless of the
135    endianness of the underlying host. */
getUIntLittleEndianly(const UChar * p)136 static inline UInt getUIntLittleEndianly ( const UChar* p )
137 {
138    UInt w = 0;
139    w = (w << 8) | p[3];
140    w = (w << 8) | p[2];
141    w = (w << 8) | p[1];
142    w = (w << 8) | p[0];
143    return w;
144 }
145 
146 /* Sign extend a N-bit value up to 64 bits, by copying
147    bit N-1 into all higher positions. */
sx_to_64(ULong x,UInt n)148 static ULong sx_to_64 ( ULong x, UInt n )
149 {
150    vassert(n > 1 && n < 64);
151    x <<= (64-n);
152    Long r = (Long)x;
153    r >>= (64-n);
154    return (ULong)r;
155 }
156 
157 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
158 //ZZ    endianness of the underlying host. */
159 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
160 //ZZ {
161 //ZZ    UShort w = 0;
162 //ZZ    w = (w << 8) | p[1];
163 //ZZ    w = (w << 8) | p[0];
164 //ZZ    return w;
165 //ZZ }
166 //ZZ
167 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
168 //ZZ    vassert(sh >= 0 && sh < 32);
169 //ZZ    if (sh == 0)
170 //ZZ       return x;
171 //ZZ    else
172 //ZZ       return (x << (32-sh)) | (x >> sh);
173 //ZZ }
174 //ZZ
175 //ZZ static Int popcount32 ( UInt x )
176 //ZZ {
177 //ZZ    Int res = 0, i;
178 //ZZ    for (i = 0; i < 32; i++) {
179 //ZZ       res += (x & 1);
180 //ZZ       x >>= 1;
181 //ZZ    }
182 //ZZ    return res;
183 //ZZ }
184 //ZZ
185 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
186 //ZZ {
187 //ZZ    UInt mask = 1 << ix;
188 //ZZ    x &= ~mask;
189 //ZZ    x |= ((b << ix) & mask);
190 //ZZ    return x;
191 //ZZ }
192 
193 #define BITS2(_b1,_b0)  \
194    (((_b1) << 1) | (_b0))
195 
196 #define BITS3(_b2,_b1,_b0)  \
197   (((_b2) << 2) | ((_b1) << 1) | (_b0))
198 
199 #define BITS4(_b3,_b2,_b1,_b0)  \
200    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
201 
202 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
203    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4)  \
204     | BITS4((_b3),(_b2),(_b1),(_b0)))
205 
206 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
207    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
208 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
209    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
210 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
211    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
212 
213 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
214    (((_b8) << 8)  \
215     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
216 
217 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
218    (((_b9) << 9) | ((_b8) << 8)  \
219     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
220 
221 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
222    (((_b10) << 10)  \
223     | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
224 
225 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
226    (((_b11) << 11)  \
227     | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
228 
229 #define X00 BITS2(0,0)
230 #define X01 BITS2(0,1)
231 #define X10 BITS2(1,0)
232 #define X11 BITS2(1,1)
233 
234 // produces _uint[_bMax:_bMin]
235 #define SLICE_UInt(_uint,_bMax,_bMin)  \
236    (( ((UInt)(_uint)) >> (_bMin))  \
237     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
238 
239 
240 /*------------------------------------------------------------*/
241 /*--- Helper bits and pieces for creating IR fragments.    ---*/
242 /*------------------------------------------------------------*/
243 
mkV128(UShort w)244 static IRExpr* mkV128 ( UShort w )
245 {
246    return IRExpr_Const(IRConst_V128(w));
247 }
248 
mkU64(ULong i)249 static IRExpr* mkU64 ( ULong i )
250 {
251    return IRExpr_Const(IRConst_U64(i));
252 }
253 
mkU32(UInt i)254 static IRExpr* mkU32 ( UInt i )
255 {
256    return IRExpr_Const(IRConst_U32(i));
257 }
258 
mkU16(UInt i)259 static IRExpr* mkU16 ( UInt i )
260 {
261    vassert(i < 65536);
262    return IRExpr_Const(IRConst_U16(i));
263 }
264 
mkU8(UInt i)265 static IRExpr* mkU8 ( UInt i )
266 {
267    vassert(i < 256);
268    return IRExpr_Const(IRConst_U8( (UChar)i ));
269 }
270 
mkexpr(IRTemp tmp)271 static IRExpr* mkexpr ( IRTemp tmp )
272 {
273    return IRExpr_RdTmp(tmp);
274 }
275 
unop(IROp op,IRExpr * a)276 static IRExpr* unop ( IROp op, IRExpr* a )
277 {
278    return IRExpr_Unop(op, a);
279 }
280 
binop(IROp op,IRExpr * a1,IRExpr * a2)281 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
282 {
283    return IRExpr_Binop(op, a1, a2);
284 }
285 
triop(IROp op,IRExpr * a1,IRExpr * a2,IRExpr * a3)286 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
287 {
288    return IRExpr_Triop(op, a1, a2, a3);
289 }
290 
loadLE(IRType ty,IRExpr * addr)291 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
292 {
293    return IRExpr_Load(Iend_LE, ty, addr);
294 }
295 
296 /* Add a statement to the list held by "irbb". */
stmt(IRStmt * st)297 static void stmt ( IRStmt* st )
298 {
299    addStmtToIRSB( irsb, st );
300 }
301 
assign(IRTemp dst,IRExpr * e)302 static void assign ( IRTemp dst, IRExpr* e )
303 {
304    stmt( IRStmt_WrTmp(dst, e) );
305 }
306 
storeLE(IRExpr * addr,IRExpr * data)307 static void storeLE ( IRExpr* addr, IRExpr* data )
308 {
309    stmt( IRStmt_Store(Iend_LE, addr, data) );
310 }
311 
312 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
313 //ZZ {
314 //ZZ    if (guardT == IRTemp_INVALID) {
315 //ZZ       /* unconditional */
316 //ZZ       storeLE(addr, data);
317 //ZZ    } else {
318 //ZZ       stmt( IRStmt_StoreG(Iend_LE, addr, data,
319 //ZZ                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
320 //ZZ    }
321 //ZZ }
322 //ZZ
323 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
324 //ZZ                             IRExpr* addr, IRExpr* alt,
325 //ZZ                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
326 //ZZ {
327 //ZZ    if (guardT == IRTemp_INVALID) {
328 //ZZ       /* unconditional */
329 //ZZ       IRExpr* loaded = NULL;
330 //ZZ       switch (cvt) {
331 //ZZ          case ILGop_Ident32:
332 //ZZ             loaded = loadLE(Ity_I32, addr); break;
333 //ZZ          case ILGop_8Uto32:
334 //ZZ             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
335 //ZZ          case ILGop_8Sto32:
336 //ZZ             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
337 //ZZ          case ILGop_16Uto32:
338 //ZZ             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
339 //ZZ          case ILGop_16Sto32:
340 //ZZ             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
341 //ZZ          default:
342 //ZZ             vassert(0);
343 //ZZ       }
344 //ZZ       vassert(loaded != NULL);
345 //ZZ       assign(dst, loaded);
346 //ZZ    } else {
347 //ZZ       /* Generate a guarded load into 'dst', but apply 'cvt' to the
348 //ZZ          loaded data before putting the data in 'dst'.  If the load
349 //ZZ          does not take place, 'alt' is placed directly in 'dst'. */
350 //ZZ       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
351 //ZZ                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
352 //ZZ    }
353 //ZZ }
354 
355 /* Generate a new temporary of the given type. */
newTemp(IRType ty)356 static IRTemp newTemp ( IRType ty )
357 {
358    vassert(isPlausibleIRType(ty));
359    return newIRTemp( irsb->tyenv, ty );
360 }
361 
362 /* This is used in many places, so the brevity is an advantage. */
newTempV128(void)363 static IRTemp newTempV128(void)
364 {
365    return newTemp(Ity_V128);
366 }
367 
368 /* Initialise V128 temporaries en masse. */
369 static
newTempsV128_2(IRTemp * t1,IRTemp * t2)370 void newTempsV128_2(IRTemp* t1, IRTemp* t2)
371 {
372    vassert(t1 && *t1 == IRTemp_INVALID);
373    vassert(t2 && *t2 == IRTemp_INVALID);
374    *t1 = newTempV128();
375    *t2 = newTempV128();
376 }
377 
378 static
newTempsV128_3(IRTemp * t1,IRTemp * t2,IRTemp * t3)379 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
380 {
381    vassert(t1 && *t1 == IRTemp_INVALID);
382    vassert(t2 && *t2 == IRTemp_INVALID);
383    vassert(t3 && *t3 == IRTemp_INVALID);
384    *t1 = newTempV128();
385    *t2 = newTempV128();
386    *t3 = newTempV128();
387 }
388 
389 static
newTempsV128_4(IRTemp * t1,IRTemp * t2,IRTemp * t3,IRTemp * t4)390 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
391 {
392    vassert(t1 && *t1 == IRTemp_INVALID);
393    vassert(t2 && *t2 == IRTemp_INVALID);
394    vassert(t3 && *t3 == IRTemp_INVALID);
395    vassert(t4 && *t4 == IRTemp_INVALID);
396    *t1 = newTempV128();
397    *t2 = newTempV128();
398    *t3 = newTempV128();
399    *t4 = newTempV128();
400 }
401 
402 static
newTempsV128_7(IRTemp * t1,IRTemp * t2,IRTemp * t3,IRTemp * t4,IRTemp * t5,IRTemp * t6,IRTemp * t7)403 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
404                     IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
405 {
406    vassert(t1 && *t1 == IRTemp_INVALID);
407    vassert(t2 && *t2 == IRTemp_INVALID);
408    vassert(t3 && *t3 == IRTemp_INVALID);
409    vassert(t4 && *t4 == IRTemp_INVALID);
410    vassert(t5 && *t5 == IRTemp_INVALID);
411    vassert(t6 && *t6 == IRTemp_INVALID);
412    vassert(t7 && *t7 == IRTemp_INVALID);
413    *t1 = newTempV128();
414    *t2 = newTempV128();
415    *t3 = newTempV128();
416    *t4 = newTempV128();
417    *t5 = newTempV128();
418    *t6 = newTempV128();
419    *t7 = newTempV128();
420 }
421 
422 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
423 //ZZ    IRRoundingMode. */
424 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
425 //ZZ {
426 //ZZ    return mkU32(Irrm_NEAREST);
427 //ZZ }
428 //ZZ
429 //ZZ /* Generate an expression for SRC rotated right by ROT. */
430 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
431 //ZZ {
432 //ZZ    vassert(rot >= 0 && rot < 32);
433 //ZZ    if (rot == 0)
434 //ZZ       return mkexpr(src);
435 //ZZ    return
436 //ZZ       binop(Iop_Or32,
437 //ZZ             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
438 //ZZ             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
439 //ZZ }
440 //ZZ
441 //ZZ static IRExpr* mkU128 ( ULong i )
442 //ZZ {
443 //ZZ    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
444 //ZZ }
445 //ZZ
446 //ZZ /* Generate a 4-aligned version of the given expression if
447 //ZZ    the given condition is true.  Else return it unchanged. */
448 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
449 //ZZ {
450 //ZZ    if (b)
451 //ZZ       return binop(Iop_And32, e, mkU32(~3));
452 //ZZ    else
453 //ZZ       return e;
454 //ZZ }
455 
456 /* Other IR construction helpers. */
mkAND(IRType ty)457 static IROp mkAND ( IRType ty ) {
458    switch (ty) {
459       case Ity_I32: return Iop_And32;
460       case Ity_I64: return Iop_And64;
461       default: vpanic("mkAND");
462    }
463 }
464 
mkOR(IRType ty)465 static IROp mkOR ( IRType ty ) {
466    switch (ty) {
467       case Ity_I32: return Iop_Or32;
468       case Ity_I64: return Iop_Or64;
469       default: vpanic("mkOR");
470    }
471 }
472 
mkXOR(IRType ty)473 static IROp mkXOR ( IRType ty ) {
474    switch (ty) {
475       case Ity_I32: return Iop_Xor32;
476       case Ity_I64: return Iop_Xor64;
477       default: vpanic("mkXOR");
478    }
479 }
480 
mkSHL(IRType ty)481 static IROp mkSHL ( IRType ty ) {
482    switch (ty) {
483       case Ity_I32: return Iop_Shl32;
484       case Ity_I64: return Iop_Shl64;
485       default: vpanic("mkSHL");
486    }
487 }
488 
mkSHR(IRType ty)489 static IROp mkSHR ( IRType ty ) {
490    switch (ty) {
491       case Ity_I32: return Iop_Shr32;
492       case Ity_I64: return Iop_Shr64;
493       default: vpanic("mkSHR");
494    }
495 }
496 
mkSAR(IRType ty)497 static IROp mkSAR ( IRType ty ) {
498    switch (ty) {
499       case Ity_I32: return Iop_Sar32;
500       case Ity_I64: return Iop_Sar64;
501       default: vpanic("mkSAR");
502    }
503 }
504 
mkNOT(IRType ty)505 static IROp mkNOT ( IRType ty ) {
506    switch (ty) {
507       case Ity_I32: return Iop_Not32;
508       case Ity_I64: return Iop_Not64;
509       default: vpanic("mkNOT");
510    }
511 }
512 
mkADD(IRType ty)513 static IROp mkADD ( IRType ty ) {
514    switch (ty) {
515       case Ity_I32: return Iop_Add32;
516       case Ity_I64: return Iop_Add64;
517       default: vpanic("mkADD");
518    }
519 }
520 
mkSUB(IRType ty)521 static IROp mkSUB ( IRType ty ) {
522    switch (ty) {
523       case Ity_I32: return Iop_Sub32;
524       case Ity_I64: return Iop_Sub64;
525       default: vpanic("mkSUB");
526    }
527 }
528 
mkADDF(IRType ty)529 static IROp mkADDF ( IRType ty ) {
530    switch (ty) {
531       case Ity_F32: return Iop_AddF32;
532       case Ity_F64: return Iop_AddF64;
533       default: vpanic("mkADDF");
534    }
535 }
536 
mkSUBF(IRType ty)537 static IROp mkSUBF ( IRType ty ) {
538    switch (ty) {
539       case Ity_F32: return Iop_SubF32;
540       case Ity_F64: return Iop_SubF64;
541       default: vpanic("mkSUBF");
542    }
543 }
544 
mkMULF(IRType ty)545 static IROp mkMULF ( IRType ty ) {
546    switch (ty) {
547       case Ity_F32: return Iop_MulF32;
548       case Ity_F64: return Iop_MulF64;
549       default: vpanic("mkMULF");
550    }
551 }
552 
mkDIVF(IRType ty)553 static IROp mkDIVF ( IRType ty ) {
554    switch (ty) {
555       case Ity_F32: return Iop_DivF32;
556       case Ity_F64: return Iop_DivF64;
557       default: vpanic("mkMULF");
558    }
559 }
560 
mkNEGF(IRType ty)561 static IROp mkNEGF ( IRType ty ) {
562    switch (ty) {
563       case Ity_F32: return Iop_NegF32;
564       case Ity_F64: return Iop_NegF64;
565       default: vpanic("mkNEGF");
566    }
567 }
568 
mkABSF(IRType ty)569 static IROp mkABSF ( IRType ty ) {
570    switch (ty) {
571       case Ity_F32: return Iop_AbsF32;
572       case Ity_F64: return Iop_AbsF64;
573       default: vpanic("mkNEGF");
574    }
575 }
576 
mkSQRTF(IRType ty)577 static IROp mkSQRTF ( IRType ty ) {
578    switch (ty) {
579       case Ity_F32: return Iop_SqrtF32;
580       case Ity_F64: return Iop_SqrtF64;
581       default: vpanic("mkNEGF");
582    }
583 }
584 
mkVecADD(UInt size)585 static IROp mkVecADD ( UInt size ) {
586    const IROp ops[4]
587       = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
588    vassert(size < 4);
589    return ops[size];
590 }
591 
mkVecQADDU(UInt size)592 static IROp mkVecQADDU ( UInt size ) {
593    const IROp ops[4]
594       = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
595    vassert(size < 4);
596    return ops[size];
597 }
598 
mkVecQADDS(UInt size)599 static IROp mkVecQADDS ( UInt size ) {
600    const IROp ops[4]
601       = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
602    vassert(size < 4);
603    return ops[size];
604 }
605 
mkVecQADDEXTSUSATUU(UInt size)606 static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
607    const IROp ops[4]
608       = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
609           Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
610    vassert(size < 4);
611    return ops[size];
612 }
613 
mkVecQADDEXTUSSATSS(UInt size)614 static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
615    const IROp ops[4]
616       = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
617           Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
618    vassert(size < 4);
619    return ops[size];
620 }
621 
mkVecSUB(UInt size)622 static IROp mkVecSUB ( UInt size ) {
623    const IROp ops[4]
624       = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
625    vassert(size < 4);
626    return ops[size];
627 }
628 
mkVecQSUBU(UInt size)629 static IROp mkVecQSUBU ( UInt size ) {
630    const IROp ops[4]
631       = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
632    vassert(size < 4);
633    return ops[size];
634 }
635 
mkVecQSUBS(UInt size)636 static IROp mkVecQSUBS ( UInt size ) {
637    const IROp ops[4]
638       = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
639    vassert(size < 4);
640    return ops[size];
641 }
642 
mkVecSARN(UInt size)643 static IROp mkVecSARN ( UInt size ) {
644    const IROp ops[4]
645       = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
646    vassert(size < 4);
647    return ops[size];
648 }
649 
mkVecSHRN(UInt size)650 static IROp mkVecSHRN ( UInt size ) {
651    const IROp ops[4]
652       = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
653    vassert(size < 4);
654    return ops[size];
655 }
656 
mkVecSHLN(UInt size)657 static IROp mkVecSHLN ( UInt size ) {
658    const IROp ops[4]
659       = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
660    vassert(size < 4);
661    return ops[size];
662 }
663 
mkVecCATEVENLANES(UInt size)664 static IROp mkVecCATEVENLANES ( UInt size ) {
665    const IROp ops[4]
666       = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
667           Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
668    vassert(size < 4);
669    return ops[size];
670 }
671 
mkVecCATODDLANES(UInt size)672 static IROp mkVecCATODDLANES ( UInt size ) {
673    const IROp ops[4]
674       = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
675           Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
676    vassert(size < 4);
677    return ops[size];
678 }
679 
mkVecINTERLEAVELO(UInt size)680 static IROp mkVecINTERLEAVELO ( UInt size ) {
681    const IROp ops[4]
682       = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
683           Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
684    vassert(size < 4);
685    return ops[size];
686 }
687 
mkVecINTERLEAVEHI(UInt size)688 static IROp mkVecINTERLEAVEHI ( UInt size ) {
689    const IROp ops[4]
690       = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
691           Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
692    vassert(size < 4);
693    return ops[size];
694 }
695 
mkVecMAXU(UInt size)696 static IROp mkVecMAXU ( UInt size ) {
697    const IROp ops[4]
698       = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
699    vassert(size < 4);
700    return ops[size];
701 }
702 
mkVecMAXS(UInt size)703 static IROp mkVecMAXS ( UInt size ) {
704    const IROp ops[4]
705       = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
706    vassert(size < 4);
707    return ops[size];
708 }
709 
mkVecMINU(UInt size)710 static IROp mkVecMINU ( UInt size ) {
711    const IROp ops[4]
712       = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
713    vassert(size < 4);
714    return ops[size];
715 }
716 
mkVecMINS(UInt size)717 static IROp mkVecMINS ( UInt size ) {
718    const IROp ops[4]
719       = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
720    vassert(size < 4);
721    return ops[size];
722 }
723 
mkVecMUL(UInt size)724 static IROp mkVecMUL ( UInt size ) {
725    const IROp ops[4]
726       = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
727    vassert(size < 3);
728    return ops[size];
729 }
730 
mkVecMULLU(UInt sizeNarrow)731 static IROp mkVecMULLU ( UInt sizeNarrow ) {
732    const IROp ops[4]
733       = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
734    vassert(sizeNarrow < 3);
735    return ops[sizeNarrow];
736 }
737 
mkVecMULLS(UInt sizeNarrow)738 static IROp mkVecMULLS ( UInt sizeNarrow ) {
739    const IROp ops[4]
740       = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
741    vassert(sizeNarrow < 3);
742    return ops[sizeNarrow];
743 }
744 
mkVecQDMULLS(UInt sizeNarrow)745 static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
746    const IROp ops[4]
747       = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
748    vassert(sizeNarrow < 3);
749    return ops[sizeNarrow];
750 }
751 
mkVecCMPEQ(UInt size)752 static IROp mkVecCMPEQ ( UInt size ) {
753    const IROp ops[4]
754       = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
755    vassert(size < 4);
756    return ops[size];
757 }
758 
mkVecCMPGTU(UInt size)759 static IROp mkVecCMPGTU ( UInt size ) {
760    const IROp ops[4]
761       = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
762    vassert(size < 4);
763    return ops[size];
764 }
765 
mkVecCMPGTS(UInt size)766 static IROp mkVecCMPGTS ( UInt size ) {
767    const IROp ops[4]
768       = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
769    vassert(size < 4);
770    return ops[size];
771 }
772 
mkVecABS(UInt size)773 static IROp mkVecABS ( UInt size ) {
774    const IROp ops[4]
775       = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
776    vassert(size < 4);
777    return ops[size];
778 }
779 
mkVecZEROHIxxOFV128(UInt size)780 static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
781    const IROp ops[4]
782       = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
783           Iop_ZeroHI96ofV128,  Iop_ZeroHI64ofV128 };
784    vassert(size < 4);
785    return ops[size];
786 }
787 
mkU(IRType ty,ULong imm)788 static IRExpr* mkU ( IRType ty, ULong imm ) {
789    switch (ty) {
790       case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
791       case Ity_I64: return mkU64(imm);
792       default: vpanic("mkU");
793    }
794 }
795 
mkVecQDMULHIS(UInt size)796 static IROp mkVecQDMULHIS ( UInt size ) {
797    const IROp ops[4]
798       = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
799    vassert(size < 4);
800    return ops[size];
801 }
802 
mkVecQRDMULHIS(UInt size)803 static IROp mkVecQRDMULHIS ( UInt size ) {
804    const IROp ops[4]
805       = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
806    vassert(size < 4);
807    return ops[size];
808 }
809 
mkVecQANDUQSH(UInt size)810 static IROp mkVecQANDUQSH ( UInt size ) {
811    const IROp ops[4]
812       = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
813           Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
814    vassert(size < 4);
815    return ops[size];
816 }
817 
mkVecQANDSQSH(UInt size)818 static IROp mkVecQANDSQSH ( UInt size ) {
819    const IROp ops[4]
820       = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
821           Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
822    vassert(size < 4);
823    return ops[size];
824 }
825 
mkVecQANDUQRSH(UInt size)826 static IROp mkVecQANDUQRSH ( UInt size ) {
827    const IROp ops[4]
828       = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
829           Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
830    vassert(size < 4);
831    return ops[size];
832 }
833 
mkVecQANDSQRSH(UInt size)834 static IROp mkVecQANDSQRSH ( UInt size ) {
835    const IROp ops[4]
836       = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
837           Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
838    vassert(size < 4);
839    return ops[size];
840 }
841 
mkVecSHU(UInt size)842 static IROp mkVecSHU ( UInt size ) {
843    const IROp ops[4]
844       = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
845    vassert(size < 4);
846    return ops[size];
847 }
848 
mkVecSHS(UInt size)849 static IROp mkVecSHS ( UInt size ) {
850    const IROp ops[4]
851       = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
852    vassert(size < 4);
853    return ops[size];
854 }
855 
mkVecRSHU(UInt size)856 static IROp mkVecRSHU ( UInt size ) {
857    const IROp ops[4]
858       = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
859    vassert(size < 4);
860    return ops[size];
861 }
862 
mkVecRSHS(UInt size)863 static IROp mkVecRSHS ( UInt size ) {
864    const IROp ops[4]
865       = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
866    vassert(size < 4);
867    return ops[size];
868 }
869 
mkVecNARROWUN(UInt sizeNarrow)870 static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
871    const IROp ops[4]
872       = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
873           Iop_NarrowUn64to32x2, Iop_INVALID };
874    vassert(sizeNarrow < 4);
875    return ops[sizeNarrow];
876 }
877 
mkVecQNARROWUNSU(UInt sizeNarrow)878 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
879    const IROp ops[4]
880       = { Iop_QNarrowUn16Sto8Ux8,  Iop_QNarrowUn32Sto16Ux4,
881           Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
882    vassert(sizeNarrow < 4);
883    return ops[sizeNarrow];
884 }
885 
mkVecQNARROWUNSS(UInt sizeNarrow)886 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
887    const IROp ops[4]
888       = { Iop_QNarrowUn16Sto8Sx8,  Iop_QNarrowUn32Sto16Sx4,
889           Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
890    vassert(sizeNarrow < 4);
891    return ops[sizeNarrow];
892 }
893 
mkVecQNARROWUNUU(UInt sizeNarrow)894 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
895    const IROp ops[4]
896       = { Iop_QNarrowUn16Uto8Ux8,  Iop_QNarrowUn32Uto16Ux4,
897           Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
898    vassert(sizeNarrow < 4);
899    return ops[sizeNarrow];
900 }
901 
mkVecQANDqshrNNARROWUU(UInt sizeNarrow)902 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
903    const IROp ops[4]
904       = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
905           Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
906    vassert(sizeNarrow < 4);
907    return ops[sizeNarrow];
908 }
909 
mkVecQANDqsarNNARROWSS(UInt sizeNarrow)910 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
911    const IROp ops[4]
912       = { Iop_QandQSarNnarrow16Sto8Sx8,  Iop_QandQSarNnarrow32Sto16Sx4,
913           Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
914    vassert(sizeNarrow < 4);
915    return ops[sizeNarrow];
916 }
917 
mkVecQANDqsarNNARROWSU(UInt sizeNarrow)918 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
919    const IROp ops[4]
920       = { Iop_QandQSarNnarrow16Sto8Ux8,  Iop_QandQSarNnarrow32Sto16Ux4,
921           Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
922    vassert(sizeNarrow < 4);
923    return ops[sizeNarrow];
924 }
925 
mkVecQANDqrshrNNARROWUU(UInt sizeNarrow)926 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
927    const IROp ops[4]
928       = { Iop_QandQRShrNnarrow16Uto8Ux8,  Iop_QandQRShrNnarrow32Uto16Ux4,
929           Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
930    vassert(sizeNarrow < 4);
931    return ops[sizeNarrow];
932 }
933 
mkVecQANDqrsarNNARROWSS(UInt sizeNarrow)934 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
935    const IROp ops[4]
936       = { Iop_QandQRSarNnarrow16Sto8Sx8,  Iop_QandQRSarNnarrow32Sto16Sx4,
937           Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
938    vassert(sizeNarrow < 4);
939    return ops[sizeNarrow];
940 }
941 
mkVecQANDqrsarNNARROWSU(UInt sizeNarrow)942 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
943    const IROp ops[4]
944       = { Iop_QandQRSarNnarrow16Sto8Ux8,  Iop_QandQRSarNnarrow32Sto16Ux4,
945           Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
946    vassert(sizeNarrow < 4);
947    return ops[sizeNarrow];
948 }
949 
mkVecQSHLNSATUU(UInt size)950 static IROp mkVecQSHLNSATUU ( UInt size ) {
951    const IROp ops[4]
952       = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
953           Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
954    vassert(size < 4);
955    return ops[size];
956 }
957 
mkVecQSHLNSATSS(UInt size)958 static IROp mkVecQSHLNSATSS ( UInt size ) {
959    const IROp ops[4]
960       = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
961           Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
962    vassert(size < 4);
963    return ops[size];
964 }
965 
mkVecQSHLNSATSU(UInt size)966 static IROp mkVecQSHLNSATSU ( UInt size ) {
967    const IROp ops[4]
968       = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
969           Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
970    vassert(size < 4);
971    return ops[size];
972 }
973 
mkVecADDF(UInt size)974 static IROp mkVecADDF ( UInt size ) {
975    const IROp ops[4]
976       = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
977    vassert(size < 4);
978    return ops[size];
979 }
980 
mkVecMAXF(UInt size)981 static IROp mkVecMAXF ( UInt size ) {
982    const IROp ops[4]
983       = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
984    vassert(size < 4);
985    return ops[size];
986 }
987 
mkVecMINF(UInt size)988 static IROp mkVecMINF ( UInt size ) {
989    const IROp ops[4]
990       = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
991    vassert(size < 4);
992    return ops[size];
993 }
994 
995 /* Generate IR to create 'arg rotated right by imm', for sane values
996    of 'ty' and 'imm'. */
mathROR(IRType ty,IRTemp arg,UInt imm)997 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
998 {
999    UInt w = 0;
1000    if (ty == Ity_I64) {
1001       w = 64;
1002    } else {
1003       vassert(ty == Ity_I32);
1004       w = 32;
1005    }
1006    vassert(w != 0);
1007    vassert(imm < w);
1008    if (imm == 0) {
1009       return arg;
1010    }
1011    IRTemp res = newTemp(ty);
1012    assign(res, binop(mkOR(ty),
1013                      binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1014                      binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1015    return res;
1016 }
1017 
1018 /* Generate IR to set the returned temp to either all-zeroes or
1019    all ones, as a copy of arg<imm>. */
mathREPLICATE(IRType ty,IRTemp arg,UInt imm)1020 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1021 {
1022    UInt w = 0;
1023    if (ty == Ity_I64) {
1024       w = 64;
1025    } else {
1026       vassert(ty == Ity_I32);
1027       w = 32;
1028    }
1029    vassert(w != 0);
1030    vassert(imm < w);
1031    IRTemp res = newTemp(ty);
1032    assign(res, binop(mkSAR(ty),
1033                      binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1034                      mkU8(w - 1)));
1035    return res;
1036 }
1037 
1038 /* U-widen 8/16/32/64 bit int expr to 64. */
widenUto64(IRType srcTy,IRExpr * e)1039 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1040 {
1041    switch (srcTy) {
1042       case Ity_I64: return e;
1043       case Ity_I32: return unop(Iop_32Uto64, e);
1044       case Ity_I16: return unop(Iop_16Uto64, e);
1045       case Ity_I8:  return unop(Iop_8Uto64, e);
1046       default: vpanic("widenUto64(arm64)");
1047    }
1048 }
1049 
1050 /* Narrow 64 bit int expr to 8/16/32/64.  Clearly only some
1051    of these combinations make sense. */
narrowFrom64(IRType dstTy,IRExpr * e)1052 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1053 {
1054    switch (dstTy) {
1055       case Ity_I64: return e;
1056       case Ity_I32: return unop(Iop_64to32, e);
1057       case Ity_I16: return unop(Iop_64to16, e);
1058       case Ity_I8:  return unop(Iop_64to8, e);
1059       default: vpanic("narrowFrom64(arm64)");
1060    }
1061 }
1062 
1063 
1064 /*------------------------------------------------------------*/
1065 /*--- Helpers for accessing guest registers.               ---*/
1066 /*------------------------------------------------------------*/
1067 
1068 #define OFFB_X0       offsetof(VexGuestARM64State,guest_X0)
1069 #define OFFB_X1       offsetof(VexGuestARM64State,guest_X1)
1070 #define OFFB_X2       offsetof(VexGuestARM64State,guest_X2)
1071 #define OFFB_X3       offsetof(VexGuestARM64State,guest_X3)
1072 #define OFFB_X4       offsetof(VexGuestARM64State,guest_X4)
1073 #define OFFB_X5       offsetof(VexGuestARM64State,guest_X5)
1074 #define OFFB_X6       offsetof(VexGuestARM64State,guest_X6)
1075 #define OFFB_X7       offsetof(VexGuestARM64State,guest_X7)
1076 #define OFFB_X8       offsetof(VexGuestARM64State,guest_X8)
1077 #define OFFB_X9       offsetof(VexGuestARM64State,guest_X9)
1078 #define OFFB_X10      offsetof(VexGuestARM64State,guest_X10)
1079 #define OFFB_X11      offsetof(VexGuestARM64State,guest_X11)
1080 #define OFFB_X12      offsetof(VexGuestARM64State,guest_X12)
1081 #define OFFB_X13      offsetof(VexGuestARM64State,guest_X13)
1082 #define OFFB_X14      offsetof(VexGuestARM64State,guest_X14)
1083 #define OFFB_X15      offsetof(VexGuestARM64State,guest_X15)
1084 #define OFFB_X16      offsetof(VexGuestARM64State,guest_X16)
1085 #define OFFB_X17      offsetof(VexGuestARM64State,guest_X17)
1086 #define OFFB_X18      offsetof(VexGuestARM64State,guest_X18)
1087 #define OFFB_X19      offsetof(VexGuestARM64State,guest_X19)
1088 #define OFFB_X20      offsetof(VexGuestARM64State,guest_X20)
1089 #define OFFB_X21      offsetof(VexGuestARM64State,guest_X21)
1090 #define OFFB_X22      offsetof(VexGuestARM64State,guest_X22)
1091 #define OFFB_X23      offsetof(VexGuestARM64State,guest_X23)
1092 #define OFFB_X24      offsetof(VexGuestARM64State,guest_X24)
1093 #define OFFB_X25      offsetof(VexGuestARM64State,guest_X25)
1094 #define OFFB_X26      offsetof(VexGuestARM64State,guest_X26)
1095 #define OFFB_X27      offsetof(VexGuestARM64State,guest_X27)
1096 #define OFFB_X28      offsetof(VexGuestARM64State,guest_X28)
1097 #define OFFB_X29      offsetof(VexGuestARM64State,guest_X29)
1098 #define OFFB_X30      offsetof(VexGuestARM64State,guest_X30)
1099 
1100 #define OFFB_XSP      offsetof(VexGuestARM64State,guest_XSP)
1101 #define OFFB_PC       offsetof(VexGuestARM64State,guest_PC)
1102 
1103 #define OFFB_CC_OP    offsetof(VexGuestARM64State,guest_CC_OP)
1104 #define OFFB_CC_DEP1  offsetof(VexGuestARM64State,guest_CC_DEP1)
1105 #define OFFB_CC_DEP2  offsetof(VexGuestARM64State,guest_CC_DEP2)
1106 #define OFFB_CC_NDEP  offsetof(VexGuestARM64State,guest_CC_NDEP)
1107 
1108 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1109 #define OFFB_NRADDR   offsetof(VexGuestARM64State,guest_NRADDR)
1110 
1111 #define OFFB_Q0       offsetof(VexGuestARM64State,guest_Q0)
1112 #define OFFB_Q1       offsetof(VexGuestARM64State,guest_Q1)
1113 #define OFFB_Q2       offsetof(VexGuestARM64State,guest_Q2)
1114 #define OFFB_Q3       offsetof(VexGuestARM64State,guest_Q3)
1115 #define OFFB_Q4       offsetof(VexGuestARM64State,guest_Q4)
1116 #define OFFB_Q5       offsetof(VexGuestARM64State,guest_Q5)
1117 #define OFFB_Q6       offsetof(VexGuestARM64State,guest_Q6)
1118 #define OFFB_Q7       offsetof(VexGuestARM64State,guest_Q7)
1119 #define OFFB_Q8       offsetof(VexGuestARM64State,guest_Q8)
1120 #define OFFB_Q9       offsetof(VexGuestARM64State,guest_Q9)
1121 #define OFFB_Q10      offsetof(VexGuestARM64State,guest_Q10)
1122 #define OFFB_Q11      offsetof(VexGuestARM64State,guest_Q11)
1123 #define OFFB_Q12      offsetof(VexGuestARM64State,guest_Q12)
1124 #define OFFB_Q13      offsetof(VexGuestARM64State,guest_Q13)
1125 #define OFFB_Q14      offsetof(VexGuestARM64State,guest_Q14)
1126 #define OFFB_Q15      offsetof(VexGuestARM64State,guest_Q15)
1127 #define OFFB_Q16      offsetof(VexGuestARM64State,guest_Q16)
1128 #define OFFB_Q17      offsetof(VexGuestARM64State,guest_Q17)
1129 #define OFFB_Q18      offsetof(VexGuestARM64State,guest_Q18)
1130 #define OFFB_Q19      offsetof(VexGuestARM64State,guest_Q19)
1131 #define OFFB_Q20      offsetof(VexGuestARM64State,guest_Q20)
1132 #define OFFB_Q21      offsetof(VexGuestARM64State,guest_Q21)
1133 #define OFFB_Q22      offsetof(VexGuestARM64State,guest_Q22)
1134 #define OFFB_Q23      offsetof(VexGuestARM64State,guest_Q23)
1135 #define OFFB_Q24      offsetof(VexGuestARM64State,guest_Q24)
1136 #define OFFB_Q25      offsetof(VexGuestARM64State,guest_Q25)
1137 #define OFFB_Q26      offsetof(VexGuestARM64State,guest_Q26)
1138 #define OFFB_Q27      offsetof(VexGuestARM64State,guest_Q27)
1139 #define OFFB_Q28      offsetof(VexGuestARM64State,guest_Q28)
1140 #define OFFB_Q29      offsetof(VexGuestARM64State,guest_Q29)
1141 #define OFFB_Q30      offsetof(VexGuestARM64State,guest_Q30)
1142 #define OFFB_Q31      offsetof(VexGuestARM64State,guest_Q31)
1143 
1144 #define OFFB_FPCR     offsetof(VexGuestARM64State,guest_FPCR)
1145 #define OFFB_QCFLAG   offsetof(VexGuestARM64State,guest_QCFLAG)
1146 
1147 #define OFFB_CMSTART  offsetof(VexGuestARM64State,guest_CMSTART)
1148 #define OFFB_CMLEN    offsetof(VexGuestARM64State,guest_CMLEN)
1149 
1150 #define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
1151 #define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
1152 #define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA)
1153 
1154 
1155 /* ---------------- Integer registers ---------------- */
1156 
offsetIReg64(UInt iregNo)1157 static Int offsetIReg64 ( UInt iregNo )
1158 {
1159    /* Do we care about endianness here?  We do if sub-parts of integer
1160       registers are accessed. */
1161    switch (iregNo) {
1162       case 0:  return OFFB_X0;
1163       case 1:  return OFFB_X1;
1164       case 2:  return OFFB_X2;
1165       case 3:  return OFFB_X3;
1166       case 4:  return OFFB_X4;
1167       case 5:  return OFFB_X5;
1168       case 6:  return OFFB_X6;
1169       case 7:  return OFFB_X7;
1170       case 8:  return OFFB_X8;
1171       case 9:  return OFFB_X9;
1172       case 10: return OFFB_X10;
1173       case 11: return OFFB_X11;
1174       case 12: return OFFB_X12;
1175       case 13: return OFFB_X13;
1176       case 14: return OFFB_X14;
1177       case 15: return OFFB_X15;
1178       case 16: return OFFB_X16;
1179       case 17: return OFFB_X17;
1180       case 18: return OFFB_X18;
1181       case 19: return OFFB_X19;
1182       case 20: return OFFB_X20;
1183       case 21: return OFFB_X21;
1184       case 22: return OFFB_X22;
1185       case 23: return OFFB_X23;
1186       case 24: return OFFB_X24;
1187       case 25: return OFFB_X25;
1188       case 26: return OFFB_X26;
1189       case 27: return OFFB_X27;
1190       case 28: return OFFB_X28;
1191       case 29: return OFFB_X29;
1192       case 30: return OFFB_X30;
1193       /* but not 31 */
1194       default: vassert(0);
1195    }
1196 }
1197 
offsetIReg64orSP(UInt iregNo)1198 static Int offsetIReg64orSP ( UInt iregNo )
1199 {
1200    return iregNo == 31  ? OFFB_XSP  : offsetIReg64(iregNo);
1201 }
1202 
nameIReg64orZR(UInt iregNo)1203 static const HChar* nameIReg64orZR ( UInt iregNo )
1204 {
1205    vassert(iregNo < 32);
1206    static const HChar* names[32]
1207       = { "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
1208           "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
1209           "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1210           "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1211    return names[iregNo];
1212 }
1213 
nameIReg64orSP(UInt iregNo)1214 static const HChar* nameIReg64orSP ( UInt iregNo )
1215 {
1216    if (iregNo == 31) {
1217       return "sp";
1218    }
1219    vassert(iregNo < 31);
1220    return nameIReg64orZR(iregNo);
1221 }
1222 
getIReg64orSP(UInt iregNo)1223 static IRExpr* getIReg64orSP ( UInt iregNo )
1224 {
1225    vassert(iregNo < 32);
1226    return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1227 }
1228 
getIReg64orZR(UInt iregNo)1229 static IRExpr* getIReg64orZR ( UInt iregNo )
1230 {
1231    if (iregNo == 31) {
1232       return mkU64(0);
1233    }
1234    vassert(iregNo < 31);
1235    return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1236 }
1237 
putIReg64orSP(UInt iregNo,IRExpr * e)1238 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1239 {
1240    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1241    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1242 }
1243 
putIReg64orZR(UInt iregNo,IRExpr * e)1244 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1245 {
1246    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1247    if (iregNo == 31) {
1248       return;
1249    }
1250    vassert(iregNo < 31);
1251    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1252 }
1253 
nameIReg32orZR(UInt iregNo)1254 static const HChar* nameIReg32orZR ( UInt iregNo )
1255 {
1256    vassert(iregNo < 32);
1257    static const HChar* names[32]
1258       = { "w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",
1259           "w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
1260           "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1261           "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1262    return names[iregNo];
1263 }
1264 
nameIReg32orSP(UInt iregNo)1265 static const HChar* nameIReg32orSP ( UInt iregNo )
1266 {
1267    if (iregNo == 31) {
1268       return "wsp";
1269    }
1270    vassert(iregNo < 31);
1271    return nameIReg32orZR(iregNo);
1272 }
1273 
getIReg32orSP(UInt iregNo)1274 static IRExpr* getIReg32orSP ( UInt iregNo )
1275 {
1276    vassert(iregNo < 32);
1277    return unop(Iop_64to32,
1278                IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1279 }
1280 
getIReg32orZR(UInt iregNo)1281 static IRExpr* getIReg32orZR ( UInt iregNo )
1282 {
1283    if (iregNo == 31) {
1284       return mkU32(0);
1285    }
1286    vassert(iregNo < 31);
1287    return unop(Iop_64to32,
1288                IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1289 }
1290 
putIReg32orSP(UInt iregNo,IRExpr * e)1291 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1292 {
1293    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1294    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1295 }
1296 
putIReg32orZR(UInt iregNo,IRExpr * e)1297 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1298 {
1299    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1300    if (iregNo == 31) {
1301       return;
1302    }
1303    vassert(iregNo < 31);
1304    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1305 }
1306 
nameIRegOrSP(Bool is64,UInt iregNo)1307 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1308 {
1309    vassert(is64 == True || is64 == False);
1310    return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1311 }
1312 
nameIRegOrZR(Bool is64,UInt iregNo)1313 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1314 {
1315    vassert(is64 == True || is64 == False);
1316    return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1317 }
1318 
getIRegOrZR(Bool is64,UInt iregNo)1319 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1320 {
1321    vassert(is64 == True || is64 == False);
1322    return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1323 }
1324 
putIRegOrZR(Bool is64,UInt iregNo,IRExpr * e)1325 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1326 {
1327    vassert(is64 == True || is64 == False);
1328    if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1329 }
1330 
putPC(IRExpr * e)1331 static void putPC ( IRExpr* e )
1332 {
1333    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1334    stmt( IRStmt_Put(OFFB_PC, e) );
1335 }
1336 
1337 
1338 /* ---------------- Vector (Q) registers ---------------- */
1339 
offsetQReg128(UInt qregNo)1340 static Int offsetQReg128 ( UInt qregNo )
1341 {
1342    /* We don't care about endianness at this point.  It only becomes
1343       relevant when dealing with sections of these registers.*/
1344    switch (qregNo) {
1345       case 0:  return OFFB_Q0;
1346       case 1:  return OFFB_Q1;
1347       case 2:  return OFFB_Q2;
1348       case 3:  return OFFB_Q3;
1349       case 4:  return OFFB_Q4;
1350       case 5:  return OFFB_Q5;
1351       case 6:  return OFFB_Q6;
1352       case 7:  return OFFB_Q7;
1353       case 8:  return OFFB_Q8;
1354       case 9:  return OFFB_Q9;
1355       case 10: return OFFB_Q10;
1356       case 11: return OFFB_Q11;
1357       case 12: return OFFB_Q12;
1358       case 13: return OFFB_Q13;
1359       case 14: return OFFB_Q14;
1360       case 15: return OFFB_Q15;
1361       case 16: return OFFB_Q16;
1362       case 17: return OFFB_Q17;
1363       case 18: return OFFB_Q18;
1364       case 19: return OFFB_Q19;
1365       case 20: return OFFB_Q20;
1366       case 21: return OFFB_Q21;
1367       case 22: return OFFB_Q22;
1368       case 23: return OFFB_Q23;
1369       case 24: return OFFB_Q24;
1370       case 25: return OFFB_Q25;
1371       case 26: return OFFB_Q26;
1372       case 27: return OFFB_Q27;
1373       case 28: return OFFB_Q28;
1374       case 29: return OFFB_Q29;
1375       case 30: return OFFB_Q30;
1376       case 31: return OFFB_Q31;
1377       default: vassert(0);
1378    }
1379 }
1380 
1381 /* Write to a complete Qreg. */
putQReg128(UInt qregNo,IRExpr * e)1382 static void putQReg128 ( UInt qregNo, IRExpr* e )
1383 {
1384    vassert(qregNo < 32);
1385    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1386    stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1387 }
1388 
1389 /* Read a complete Qreg. */
getQReg128(UInt qregNo)1390 static IRExpr* getQReg128 ( UInt qregNo )
1391 {
1392    vassert(qregNo < 32);
1393    return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1394 }
1395 
1396 /* Produce the IR type for some sub-part of a vector.  For 32- and 64-
1397    bit sub-parts we can choose either integer or float types, and
1398    choose float on the basis that that is the common use case and so
1399    will give least interference with Put-to-Get forwarding later
1400    on. */
preferredVectorSubTypeFromSize(UInt szB)1401 static IRType preferredVectorSubTypeFromSize ( UInt szB )
1402 {
1403    switch (szB) {
1404       case 1:  return Ity_I8;
1405       case 2:  return Ity_I16;
1406       case 4:  return Ity_I32; //Ity_F32;
1407       case 8:  return Ity_F64;
1408       case 16: return Ity_V128;
1409       default: vassert(0);
1410    }
1411 }
1412 
1413 /* Find the offset of the laneNo'th lane of type laneTy in the given
1414    Qreg.  Since the host is little-endian, the least significant lane
1415    has the lowest offset. */
offsetQRegLane(UInt qregNo,IRType laneTy,UInt laneNo)1416 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
1417 {
1418    vassert(host_endness == VexEndnessLE);
1419    Int base = offsetQReg128(qregNo);
1420    /* Since the host is little-endian, the least significant lane
1421       will be at the lowest address. */
1422    /* Restrict this to known types, so as to avoid silently accepting
1423       stupid types. */
1424    UInt laneSzB = 0;
1425    switch (laneTy) {
1426       case Ity_I8:                 laneSzB = 1;  break;
1427       case Ity_F16: case Ity_I16:  laneSzB = 2;  break;
1428       case Ity_F32: case Ity_I32:  laneSzB = 4;  break;
1429       case Ity_F64: case Ity_I64:  laneSzB = 8;  break;
1430       case Ity_V128:               laneSzB = 16; break;
1431       default: break;
1432    }
1433    vassert(laneSzB > 0);
1434    UInt minOff = laneNo * laneSzB;
1435    UInt maxOff = minOff + laneSzB - 1;
1436    vassert(maxOff < 16);
1437    return base + minOff;
1438 }
1439 
1440 /* Put to the least significant lane of a Qreg. */
putQRegLO(UInt qregNo,IRExpr * e)1441 static void putQRegLO ( UInt qregNo, IRExpr* e )
1442 {
1443    IRType ty  = typeOfIRExpr(irsb->tyenv, e);
1444    Int    off = offsetQRegLane(qregNo, ty, 0);
1445    switch (ty) {
1446       case Ity_I8:  case Ity_I16: case Ity_I32: case Ity_I64:
1447       case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
1448          break;
1449       default:
1450          vassert(0); // Other cases are probably invalid
1451    }
1452    stmt(IRStmt_Put(off, e));
1453 }
1454 
1455 /* Get from the least significant lane of a Qreg. */
getQRegLO(UInt qregNo,IRType ty)1456 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1457 {
1458    Int off = offsetQRegLane(qregNo, ty, 0);
1459    switch (ty) {
1460       case Ity_I8:
1461       case Ity_F16: case Ity_I16:
1462       case Ity_I32: case Ity_I64:
1463       case Ity_F32: case Ity_F64: case Ity_V128:
1464          break;
1465       default:
1466          vassert(0); // Other cases are ATC
1467    }
1468    return IRExpr_Get(off, ty);
1469 }
1470 
nameQRegLO(UInt qregNo,IRType laneTy)1471 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1472 {
1473    static const HChar* namesQ[32]
1474       = { "q0",  "q1",  "q2",  "q3",  "q4",  "q5",  "q6",  "q7",
1475           "q8",  "q9",  "q10", "q11", "q12", "q13", "q14", "q15",
1476           "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1477           "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1478    static const HChar* namesD[32]
1479       = { "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
1480           "d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
1481           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1482           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1483    static const HChar* namesS[32]
1484       = { "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
1485           "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
1486           "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1487           "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1488    static const HChar* namesH[32]
1489       = { "h0",  "h1",  "h2",  "h3",  "h4",  "h5",  "h6",  "h7",
1490           "h8",  "h9",  "h10", "h11", "h12", "h13", "h14", "h15",
1491           "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1492           "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1493    static const HChar* namesB[32]
1494       = { "b0",  "b1",  "b2",  "b3",  "b4",  "b5",  "b6",  "b7",
1495           "b8",  "b9",  "b10", "b11", "b12", "b13", "b14", "b15",
1496           "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1497           "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1498    vassert(qregNo < 32);
1499    switch (sizeofIRType(laneTy)) {
1500       case 1:  return namesB[qregNo];
1501       case 2:  return namesH[qregNo];
1502       case 4:  return namesS[qregNo];
1503       case 8:  return namesD[qregNo];
1504       case 16: return namesQ[qregNo];
1505       default: vassert(0);
1506    }
1507    /*NOTREACHED*/
1508 }
1509 
nameQReg128(UInt qregNo)1510 static const HChar* nameQReg128 ( UInt qregNo )
1511 {
1512    return nameQRegLO(qregNo, Ity_V128);
1513 }
1514 
1515 /* Find the offset of the most significant half (8 bytes) of the given
1516    Qreg.  This requires knowing the endianness of the host. */
offsetQRegHI64(UInt qregNo)1517 static Int offsetQRegHI64 ( UInt qregNo )
1518 {
1519    return offsetQRegLane(qregNo, Ity_I64, 1);
1520 }
1521 
getQRegHI64(UInt qregNo)1522 static IRExpr* getQRegHI64 ( UInt qregNo )
1523 {
1524    return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1525 }
1526 
putQRegHI64(UInt qregNo,IRExpr * e)1527 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1528 {
1529    IRType ty  = typeOfIRExpr(irsb->tyenv, e);
1530    Int    off = offsetQRegHI64(qregNo);
1531    switch (ty) {
1532       case Ity_I64: case Ity_F64:
1533          break;
1534       default:
1535          vassert(0); // Other cases are plain wrong
1536    }
1537    stmt(IRStmt_Put(off, e));
1538 }
1539 
1540 /* Put to a specified lane of a Qreg. */
putQRegLane(UInt qregNo,UInt laneNo,IRExpr * e)1541 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1542 {
1543    IRType laneTy  = typeOfIRExpr(irsb->tyenv, e);
1544    Int    off     = offsetQRegLane(qregNo, laneTy, laneNo);
1545    switch (laneTy) {
1546       case Ity_F64: case Ity_I64:
1547       case Ity_I32: case Ity_F32:
1548       case Ity_I16: case Ity_F16:
1549       case Ity_I8:
1550          break;
1551       default:
1552          vassert(0); // Other cases are ATC
1553    }
1554    stmt(IRStmt_Put(off, e));
1555 }
1556 
1557 /* Get from a specified lane of a Qreg. */
getQRegLane(UInt qregNo,UInt laneNo,IRType laneTy)1558 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1559 {
1560    Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1561    switch (laneTy) {
1562       case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1563       case Ity_F64: case Ity_F32: case Ity_F16:
1564          break;
1565       default:
1566          vassert(0); // Other cases are ATC
1567    }
1568    return IRExpr_Get(off, laneTy);
1569 }
1570 
1571 
1572 //ZZ /* ---------------- Misc registers ---------------- */
1573 //ZZ
1574 //ZZ static void putMiscReg32 ( UInt    gsoffset,
1575 //ZZ                            IRExpr* e, /* :: Ity_I32 */
1576 //ZZ                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
1577 //ZZ {
1578 //ZZ    switch (gsoffset) {
1579 //ZZ       case OFFB_FPSCR:   break;
1580 //ZZ       case OFFB_QFLAG32: break;
1581 //ZZ       case OFFB_GEFLAG0: break;
1582 //ZZ       case OFFB_GEFLAG1: break;
1583 //ZZ       case OFFB_GEFLAG2: break;
1584 //ZZ       case OFFB_GEFLAG3: break;
1585 //ZZ       default: vassert(0); /* awaiting more cases */
1586 //ZZ    }
1587 //ZZ    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1588 //ZZ
1589 //ZZ    if (guardT == IRTemp_INVALID) {
1590 //ZZ       /* unconditional write */
1591 //ZZ       stmt(IRStmt_Put(gsoffset, e));
1592 //ZZ    } else {
1593 //ZZ       stmt(IRStmt_Put(
1594 //ZZ          gsoffset,
1595 //ZZ          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1596 //ZZ                      e, IRExpr_Get(gsoffset, Ity_I32) )
1597 //ZZ       ));
1598 //ZZ    }
1599 //ZZ }
1600 //ZZ
1601 //ZZ static IRTemp get_ITSTATE ( void )
1602 //ZZ {
1603 //ZZ    ASSERT_IS_THUMB;
1604 //ZZ    IRTemp t = newTemp(Ity_I32);
1605 //ZZ    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1606 //ZZ    return t;
1607 //ZZ }
1608 //ZZ
1609 //ZZ static void put_ITSTATE ( IRTemp t )
1610 //ZZ {
1611 //ZZ    ASSERT_IS_THUMB;
1612 //ZZ    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1613 //ZZ }
1614 //ZZ
1615 //ZZ static IRTemp get_QFLAG32 ( void )
1616 //ZZ {
1617 //ZZ    IRTemp t = newTemp(Ity_I32);
1618 //ZZ    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1619 //ZZ    return t;
1620 //ZZ }
1621 //ZZ
1622 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1623 //ZZ {
1624 //ZZ    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1625 //ZZ }
1626 //ZZ
1627 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1628 //ZZ    Status Register) to indicate that overflow or saturation occurred.
1629 //ZZ    Nb: t must be zero to denote no saturation, and any nonzero
1630 //ZZ    value to indicate saturation. */
1631 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1632 //ZZ {
1633 //ZZ    IRTemp old = get_QFLAG32();
1634 //ZZ    IRTemp nyu = newTemp(Ity_I32);
1635 //ZZ    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1636 //ZZ    put_QFLAG32(nyu, condT);
1637 //ZZ }
1638 
1639 
1640 /* ---------------- FPCR stuff ---------------- */
1641 
1642 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1643    convert them to IR format.  Bind the final result to the
1644    returned temp. */
mk_get_IR_rounding_mode(void)1645 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1646 {
1647    /* The ARMvfp encoding for rounding mode bits is:
1648          00  to nearest
1649          01  to +infinity
1650          10  to -infinity
1651          11  to zero
1652       We need to convert that to the IR encoding:
1653          00  to nearest (the default)
1654          10  to +infinity
1655          01  to -infinity
1656          11  to zero
1657       Which can be done by swapping bits 0 and 1.
1658       The rmode bits are at 23:22 in FPSCR.
1659    */
1660    IRTemp armEncd = newTemp(Ity_I32);
1661    IRTemp swapped = newTemp(Ity_I32);
1662    /* Fish FPCR[23:22] out, and slide to bottom.  Doesn't matter that
1663       we don't zero out bits 24 and above, since the assignment to
1664       'swapped' will mask them out anyway. */
1665    assign(armEncd,
1666           binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1667    /* Now swap them. */
1668    assign(swapped,
1669           binop(Iop_Or32,
1670                 binop(Iop_And32,
1671                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1672                       mkU32(2)),
1673                 binop(Iop_And32,
1674                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1675                       mkU32(1))
1676          ));
1677    return swapped;
1678 }
1679 
1680 
1681 /*------------------------------------------------------------*/
1682 /*--- Helpers for flag handling and conditional insns      ---*/
1683 /*------------------------------------------------------------*/
1684 
nameARM64Condcode(ARM64Condcode cond)1685 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1686 {
1687    switch (cond) {
1688       case ARM64CondEQ:  return "eq";
1689       case ARM64CondNE:  return "ne";
1690       case ARM64CondCS:  return "cs";  // or 'hs'
1691       case ARM64CondCC:  return "cc";  // or 'lo'
1692       case ARM64CondMI:  return "mi";
1693       case ARM64CondPL:  return "pl";
1694       case ARM64CondVS:  return "vs";
1695       case ARM64CondVC:  return "vc";
1696       case ARM64CondHI:  return "hi";
1697       case ARM64CondLS:  return "ls";
1698       case ARM64CondGE:  return "ge";
1699       case ARM64CondLT:  return "lt";
1700       case ARM64CondGT:  return "gt";
1701       case ARM64CondLE:  return "le";
1702       case ARM64CondAL:  return "al";
1703       case ARM64CondNV:  return "nv";
1704       default: vpanic("name_ARM64Condcode");
1705    }
1706 }
1707 
1708 /* and a handy shorthand for it */
nameCC(ARM64Condcode cond)1709 static const HChar* nameCC ( ARM64Condcode cond ) {
1710    return nameARM64Condcode(cond);
1711 }
1712 
1713 
1714 /* Build IR to calculate some particular condition from stored
1715    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1716    Ity_I64, suitable for narrowing.  Although the return type is
1717    Ity_I64, the returned value is either 0 or 1.  'cond' must be
1718    :: Ity_I64 and must denote the condition to compute in
1719    bits 7:4, and be zero everywhere else.
1720 */
mk_arm64g_calculate_condition_dyn(IRExpr * cond)1721 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1722 {
1723    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1724    /* And 'cond' had better produce a value in which only bits 7:4 are
1725       nonzero.  However, obviously we can't assert for that. */
1726 
1727    /* So what we're constructing for the first argument is
1728       "(cond << 4) | stored-operation".
1729       However, as per comments above, 'cond' must be supplied
1730       pre-shifted to this function.
1731 
1732       This pairing scheme requires that the ARM64_CC_OP_ values all fit
1733       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1734       8 bits of the first argument. */
1735    IRExpr** args
1736       = mkIRExprVec_4(
1737            binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1738            IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1739            IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1740            IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1741         );
1742    IRExpr* call
1743       = mkIRExprCCall(
1744            Ity_I64,
1745            0/*regparm*/,
1746            "arm64g_calculate_condition", &arm64g_calculate_condition,
1747            args
1748         );
1749 
1750    /* Exclude the requested condition, OP and NDEP from definedness
1751       checking.  We're only interested in DEP1 and DEP2. */
1752    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1753    return call;
1754 }
1755 
1756 
1757 /* Build IR to calculate some particular condition from stored
1758    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1759    Ity_I64, suitable for narrowing.  Although the return type is
1760    Ity_I64, the returned value is either 0 or 1.
1761 */
mk_arm64g_calculate_condition(ARM64Condcode cond)1762 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1763 {
1764   /* First arg is "(cond << 4) | condition".  This requires that the
1765      ARM64_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1766      (COND, OP) pair in the lowest 8 bits of the first argument. */
1767    vassert(cond >= 0 && cond <= 15);
1768    return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1769 }
1770 
1771 
1772 /* Build IR to calculate just the carry flag from stored
1773    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1774    Ity_I64. */
mk_arm64g_calculate_flag_c(void)1775 static IRExpr* mk_arm64g_calculate_flag_c ( void )
1776 {
1777    IRExpr** args
1778       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
1779                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1780                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1781                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1782    IRExpr* call
1783       = mkIRExprCCall(
1784            Ity_I64,
1785            0/*regparm*/,
1786            "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1787            args
1788         );
1789    /* Exclude OP and NDEP from definedness checking.  We're only
1790       interested in DEP1 and DEP2. */
1791    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1792    return call;
1793 }
1794 
1795 
1796 //ZZ /* Build IR to calculate just the overflow flag from stored
1797 //ZZ    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1798 //ZZ    Ity_I32. */
1799 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1800 //ZZ {
1801 //ZZ    IRExpr** args
1802 //ZZ       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1803 //ZZ                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1804 //ZZ                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1805 //ZZ                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1806 //ZZ    IRExpr* call
1807 //ZZ       = mkIRExprCCall(
1808 //ZZ            Ity_I32,
1809 //ZZ            0/*regparm*/,
1810 //ZZ            "armg_calculate_flag_v", &armg_calculate_flag_v,
1811 //ZZ            args
1812 //ZZ         );
1813 //ZZ    /* Exclude OP and NDEP from definedness checking.  We're only
1814 //ZZ       interested in DEP1 and DEP2. */
1815 //ZZ    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1816 //ZZ    return call;
1817 //ZZ }
1818 
1819 
1820 /* Build IR to calculate N Z C V in bits 31:28 of the
1821    returned word. */
mk_arm64g_calculate_flags_nzcv(void)1822 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1823 {
1824    IRExpr** args
1825       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
1826                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1827                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1828                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1829    IRExpr* call
1830       = mkIRExprCCall(
1831            Ity_I64,
1832            0/*regparm*/,
1833            "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1834            args
1835         );
1836    /* Exclude OP and NDEP from definedness checking.  We're only
1837       interested in DEP1 and DEP2. */
1838    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1839    return call;
1840 }
1841 
1842 
1843 /* Build IR to set the flags thunk, in the most general case. */
1844 static
setFlags_D1_D2_ND(UInt cc_op,IRTemp t_dep1,IRTemp t_dep2,IRTemp t_ndep)1845 void setFlags_D1_D2_ND ( UInt cc_op,
1846                          IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1847 {
1848    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1849    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1850    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1851    vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1852    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(cc_op) ));
1853    stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1854    stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1855    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1856 }
1857 
1858 /* Build IR to set the flags thunk after ADD or SUB. */
1859 static
setFlags_ADD_SUB(Bool is64,Bool isSUB,IRTemp argL,IRTemp argR)1860 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1861 {
1862    IRTemp argL64 = IRTemp_INVALID;
1863    IRTemp argR64 = IRTemp_INVALID;
1864    IRTemp z64    = newTemp(Ity_I64);
1865    if (is64) {
1866       argL64 = argL;
1867       argR64 = argR;
1868    } else {
1869       argL64 = newTemp(Ity_I64);
1870       argR64 = newTemp(Ity_I64);
1871       assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1872       assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1873    }
1874    assign(z64, mkU64(0));
1875    UInt cc_op = ARM64G_CC_OP_NUMBER;
1876    /**/ if ( isSUB &&  is64) { cc_op = ARM64G_CC_OP_SUB64; }
1877    else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1878    else if (!isSUB &&  is64) { cc_op = ARM64G_CC_OP_ADD64; }
1879    else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1880    else                      { vassert(0); }
1881    setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1882 }
1883 
1884 /* Build IR to set the flags thunk after ADC or SBC. */
1885 static
setFlags_ADC_SBC(Bool is64,Bool isSBC,IRTemp argL,IRTemp argR,IRTemp oldC)1886 void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1887                         IRTemp argL, IRTemp argR, IRTemp oldC )
1888 {
1889    IRTemp argL64 = IRTemp_INVALID;
1890    IRTemp argR64 = IRTemp_INVALID;
1891    IRTemp oldC64 = IRTemp_INVALID;
1892    if (is64) {
1893       argL64 = argL;
1894       argR64 = argR;
1895       oldC64 = oldC;
1896    } else {
1897       argL64 = newTemp(Ity_I64);
1898       argR64 = newTemp(Ity_I64);
1899       oldC64 = newTemp(Ity_I64);
1900       assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1901       assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1902       assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1903    }
1904    UInt cc_op = ARM64G_CC_OP_NUMBER;
1905    /**/ if ( isSBC &&  is64) { cc_op = ARM64G_CC_OP_SBC64; }
1906    else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1907    else if (!isSBC &&  is64) { cc_op = ARM64G_CC_OP_ADC64; }
1908    else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1909    else                      { vassert(0); }
1910    setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1911 }
1912 
1913 /* Build IR to set the flags thunk after ADD or SUB, if the given
1914    condition evaluates to True at run time.  If not, the flags are set
1915    to the specified NZCV value. */
1916 static
setFlags_ADD_SUB_conditionally(Bool is64,Bool isSUB,IRTemp cond,IRTemp argL,IRTemp argR,UInt nzcv)1917 void setFlags_ADD_SUB_conditionally (
1918         Bool is64, Bool isSUB,
1919         IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1920      )
1921 {
1922    /* Generate IR as follows:
1923         CC_OP   = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1924         CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1925         CC_DEP2 = ITE(cond, argR64, 0)
1926         CC_NDEP = 0
1927    */
1928 
1929    IRTemp z64 = newTemp(Ity_I64);
1930    assign(z64, mkU64(0));
1931 
1932    /* Establish the operation and operands for the True case. */
1933    IRTemp t_dep1 = IRTemp_INVALID;
1934    IRTemp t_dep2 = IRTemp_INVALID;
1935    UInt   t_op   = ARM64G_CC_OP_NUMBER;
1936    /**/ if ( isSUB &&  is64) { t_op = ARM64G_CC_OP_SUB64; }
1937    else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1938    else if (!isSUB &&  is64) { t_op = ARM64G_CC_OP_ADD64; }
1939    else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1940    else                      { vassert(0); }
1941    /* */
1942    if (is64) {
1943       t_dep1 = argL;
1944       t_dep2 = argR;
1945    } else {
1946       t_dep1 = newTemp(Ity_I64);
1947       t_dep2 = newTemp(Ity_I64);
1948       assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1949       assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1950    }
1951 
1952    /* Establish the operation and operands for the False case. */
1953    IRTemp f_dep1 = newTemp(Ity_I64);
1954    IRTemp f_dep2 = z64;
1955    UInt   f_op   = ARM64G_CC_OP_COPY;
1956    assign(f_dep1, mkU64(nzcv << 28));
1957 
1958    /* Final thunk values */
1959    IRTemp dep1 = newTemp(Ity_I64);
1960    IRTemp dep2 = newTemp(Ity_I64);
1961    IRTemp op   = newTemp(Ity_I64);
1962 
1963    assign(op,   IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1964    assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1965    assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1966 
1967    /* finally .. */
1968    stmt( IRStmt_Put( OFFB_CC_OP,   mkexpr(op) ));
1969    stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1970    stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1971    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1972 }
1973 
1974 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1975 static
setFlags_LOGIC(Bool is64,IRTemp res)1976 void setFlags_LOGIC ( Bool is64, IRTemp res )
1977 {
1978    IRTemp res64 = IRTemp_INVALID;
1979    IRTemp z64   = newTemp(Ity_I64);
1980    UInt   cc_op = ARM64G_CC_OP_NUMBER;
1981    if (is64) {
1982       res64 = res;
1983       cc_op = ARM64G_CC_OP_LOGIC64;
1984    } else {
1985       res64 = newTemp(Ity_I64);
1986       assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1987       cc_op = ARM64G_CC_OP_LOGIC32;
1988    }
1989    assign(z64, mkU64(0));
1990    setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1991 }
1992 
1993 /* Build IR to set the flags thunk to a given NZCV value.  NZCV is
1994    located in bits 31:28 of the supplied value. */
1995 static
setFlags_COPY(IRTemp nzcv_28x0)1996 void setFlags_COPY ( IRTemp nzcv_28x0 )
1997 {
1998    IRTemp z64 = newTemp(Ity_I64);
1999    assign(z64, mkU64(0));
2000    setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
2001 }
2002 
2003 
2004 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2005 //ZZ    sets it at all) */
2006 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2007 //ZZ                              IRTemp t_dep2,
2008 //ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2009 //ZZ {
2010 //ZZ    IRTemp z32 = newTemp(Ity_I32);
2011 //ZZ    assign( z32, mkU32(0) );
2012 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2013 //ZZ }
2014 //ZZ
2015 //ZZ
2016 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2017 //ZZ    sets it at all) */
2018 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2019 //ZZ                              IRTemp t_ndep,
2020 //ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2021 //ZZ {
2022 //ZZ    IRTemp z32 = newTemp(Ity_I32);
2023 //ZZ    assign( z32, mkU32(0) );
2024 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2025 //ZZ }
2026 //ZZ
2027 //ZZ
2028 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2029 //ZZ    sets them at all) */
2030 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2031 //ZZ                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2032 //ZZ {
2033 //ZZ    IRTemp z32 = newTemp(Ity_I32);
2034 //ZZ    assign( z32, mkU32(0) );
2035 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2036 //ZZ }
2037 
2038 
2039 /*------------------------------------------------------------*/
2040 /*--- Misc math helpers                                    ---*/
2041 /*------------------------------------------------------------*/
2042 
2043 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
math_SWAPHELPER(IRTemp x,ULong mask,Int sh)2044 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
2045 {
2046    IRTemp maskT = newTemp(Ity_I64);
2047    IRTemp res   = newTemp(Ity_I64);
2048    vassert(sh >= 1 && sh <= 63);
2049    assign(maskT, mkU64(mask));
2050    assign( res,
2051            binop(Iop_Or64,
2052                  binop(Iop_Shr64,
2053                        binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2054                        mkU8(sh)),
2055                  binop(Iop_And64,
2056                        binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2057                        mkexpr(maskT))
2058                  )
2059            );
2060    return res;
2061 }
2062 
2063 /* Generates byte swaps within 32-bit lanes. */
math_UINTSWAP64(IRTemp src)2064 static IRTemp math_UINTSWAP64 ( IRTemp src )
2065 {
2066    IRTemp res;
2067    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2068    res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2069    return res;
2070 }
2071 
2072 /* Generates byte swaps within 16-bit lanes. */
math_USHORTSWAP64(IRTemp src)2073 static IRTemp math_USHORTSWAP64 ( IRTemp src )
2074 {
2075    IRTemp res;
2076    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2077    return res;
2078 }
2079 
2080 /* Generates a 64-bit byte swap. */
math_BYTESWAP64(IRTemp src)2081 static IRTemp math_BYTESWAP64 ( IRTemp src )
2082 {
2083    IRTemp res;
2084    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2085    res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2086    res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2087    return res;
2088 }
2089 
2090 /* Generates a 64-bit bit swap. */
math_BITSWAP64(IRTemp src)2091 static IRTemp math_BITSWAP64 ( IRTemp src )
2092 {
2093    IRTemp res;
2094    res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2095    res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2096    res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2097    return math_BYTESWAP64(res);
2098 }
2099 
2100 /* Duplicates the bits at the bottom of the given word to fill the
2101    whole word.  src :: Ity_I64 is assumed to have zeroes everywhere
2102    except for the bottom bits. */
math_DUP_TO_64(IRTemp src,IRType srcTy)2103 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2104 {
2105    if (srcTy == Ity_I8) {
2106       IRTemp t16 = newTemp(Ity_I64);
2107       assign(t16, binop(Iop_Or64, mkexpr(src),
2108                                   binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2109       IRTemp t32 = newTemp(Ity_I64);
2110       assign(t32, binop(Iop_Or64, mkexpr(t16),
2111                                   binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2112       IRTemp t64 = newTemp(Ity_I64);
2113       assign(t64, binop(Iop_Or64, mkexpr(t32),
2114                                   binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2115       return t64;
2116    }
2117    if (srcTy == Ity_I16) {
2118       IRTemp t32 = newTemp(Ity_I64);
2119       assign(t32, binop(Iop_Or64, mkexpr(src),
2120                                   binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2121       IRTemp t64 = newTemp(Ity_I64);
2122       assign(t64, binop(Iop_Or64, mkexpr(t32),
2123                                   binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2124       return t64;
2125    }
2126    if (srcTy == Ity_I32) {
2127       IRTemp t64 = newTemp(Ity_I64);
2128       assign(t64, binop(Iop_Or64, mkexpr(src),
2129                                   binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2130       return t64;
2131    }
2132    if (srcTy == Ity_I64) {
2133       return src;
2134    }
2135    vassert(0);
2136 }
2137 
2138 
2139 /* Duplicates the src element exactly so as to fill a V128 value. */
math_DUP_TO_V128(IRTemp src,IRType srcTy)2140 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2141 {
2142    IRTemp res = newTempV128();
2143    if (srcTy == Ity_F64) {
2144       IRTemp i64 = newTemp(Ity_I64);
2145       assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2146       assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2147       return res;
2148    }
2149    if (srcTy == Ity_F32) {
2150       IRTemp i64a = newTemp(Ity_I64);
2151       assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2152       IRTemp i64b = newTemp(Ity_I64);
2153       assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2154                                    mkexpr(i64a)));
2155       assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2156       return res;
2157    }
2158    if (srcTy == Ity_I64) {
2159       assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2160       return res;
2161    }
2162    if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2163       IRTemp t1 = newTemp(Ity_I64);
2164       assign(t1, widenUto64(srcTy, mkexpr(src)));
2165       IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2166       assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2167       return res;
2168    }
2169    vassert(0);
2170 }
2171 
2172 
2173 /* |fullWidth| is a full V128 width result.  Depending on bitQ,
2174    zero out the upper half. */
math_MAYBE_ZERO_HI64(UInt bitQ,IRTemp fullWidth)2175 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2176 {
2177    if (bitQ == 1) return mkexpr(fullWidth);
2178    if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2179    vassert(0);
2180 }
2181 
2182 /* The same, but from an expression instead. */
math_MAYBE_ZERO_HI64_fromE(UInt bitQ,IRExpr * fullWidth)2183 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2184 {
2185    IRTemp fullWidthT = newTempV128();
2186    assign(fullWidthT, fullWidth);
2187    return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2188 }
2189 
2190 
2191 /*------------------------------------------------------------*/
2192 /*--- FP comparison helpers                                ---*/
2193 /*------------------------------------------------------------*/
2194 
2195 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2196    as an IRCmpF64Result.  Generate code to convert it to an
2197    ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2198    Assign a new temp to hold that value, and return the temp. */
2199 static
mk_convert_IRCmpF64Result_to_NZCV(IRTemp irRes32)2200 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2201 {
2202    IRTemp ix       = newTemp(Ity_I64);
2203    IRTemp termL    = newTemp(Ity_I64);
2204    IRTemp termR    = newTemp(Ity_I64);
2205    IRTemp nzcv     = newTemp(Ity_I64);
2206    IRTemp irRes    = newTemp(Ity_I64);
2207 
2208    /* This is where the fun starts.  We have to convert 'irRes' from
2209       an IR-convention return result (IRCmpF64Result) to an
2210       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2211       4 bits of 'nzcv'. */
2212    /* Map compare result from IR to ARM(nzcv) */
2213    /*
2214       FP cmp result | IR   | ARM(nzcv)
2215       --------------------------------
2216       UN              0x45   0011
2217       LT              0x01   1000
2218       GT              0x00   0010
2219       EQ              0x40   0110
2220    */
2221    /* Now since you're probably wondering WTF ..
2222 
2223       ix fishes the useful bits out of the IR value, bits 6 and 0, and
2224       places them side by side, giving a number which is 0, 1, 2 or 3.
2225 
2226       termL is a sequence cooked up by GNU superopt.  It converts ix
2227          into an almost correct value NZCV value (incredibly), except
2228          for the case of UN, where it produces 0100 instead of the
2229          required 0011.
2230 
2231       termR is therefore a correction term, also computed from ix.  It
2232          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2233          the final correct value, we subtract termR from termL.
2234 
2235       Don't take my word for it.  There's a test program at the bottom
2236       of guest_arm_toIR.c, to try this out with.
2237    */
2238    assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2239 
2240    assign(
2241       ix,
2242       binop(Iop_Or64,
2243             binop(Iop_And64,
2244                   binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2245                   mkU64(3)),
2246             binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2247 
2248    assign(
2249       termL,
2250       binop(Iop_Add64,
2251             binop(Iop_Shr64,
2252                   binop(Iop_Sub64,
2253                         binop(Iop_Shl64,
2254                               binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2255                               mkU8(62)),
2256                         mkU64(1)),
2257                   mkU8(61)),
2258             mkU64(1)));
2259 
2260    assign(
2261       termR,
2262       binop(Iop_And64,
2263             binop(Iop_And64,
2264                   mkexpr(ix),
2265                   binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2266             mkU64(1)));
2267 
2268    assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2269    return nzcv;
2270 }
2271 
2272 
2273 /*------------------------------------------------------------*/
2274 /*--- Data processing (immediate)                          ---*/
2275 /*------------------------------------------------------------*/
2276 
2277 /* Helper functions for supporting "DecodeBitMasks" */
2278 
dbm_ROR(Int width,ULong x,Int rot)2279 static ULong dbm_ROR ( Int width, ULong x, Int rot )
2280 {
2281    vassert(width > 0 && width <= 64);
2282    vassert(rot >= 0 && rot < width);
2283    if (rot == 0) return x;
2284    ULong res = x >> rot;
2285    res |= (x << (width - rot));
2286    if (width < 64)
2287      res &= ((1ULL << width) - 1);
2288    return res;
2289 }
2290 
dbm_RepTo64(Int esize,ULong x)2291 static ULong dbm_RepTo64( Int esize, ULong x )
2292 {
2293    switch (esize) {
2294       case 64:
2295          return x;
2296       case 32:
2297          x &= 0xFFFFFFFF; x |= (x << 32);
2298          return x;
2299       case 16:
2300          x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2301          return x;
2302       case 8:
2303          x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2304          return x;
2305       case 4:
2306          x &= 0xF; x |= (x << 4); x |= (x << 8);
2307          x |= (x << 16); x |= (x << 32);
2308          return x;
2309       case 2:
2310          x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2311          x |= (x << 16); x |= (x << 32);
2312          return x;
2313       default:
2314          break;
2315    }
2316    vpanic("dbm_RepTo64");
2317    /*NOTREACHED*/
2318    return 0;
2319 }
2320 
dbm_highestSetBit(ULong x)2321 static Int dbm_highestSetBit ( ULong x )
2322 {
2323    Int i;
2324    for (i = 63; i >= 0; i--) {
2325       if (x & (1ULL << i))
2326          return i;
2327    }
2328    vassert(x == 0);
2329    return -1;
2330 }
2331 
2332 static
dbm_DecodeBitMasks(ULong * wmask,ULong * tmask,ULong immN,ULong imms,ULong immr,Bool immediate,UInt M)2333 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2334                           ULong immN, ULong imms, ULong immr, Bool immediate,
2335                           UInt M /*32 or 64*/)
2336 {
2337    vassert(immN < (1ULL << 1));
2338    vassert(imms < (1ULL << 6));
2339    vassert(immr < (1ULL << 6));
2340    vassert(immediate == False || immediate == True);
2341    vassert(M == 32 || M == 64);
2342 
2343    Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2344    if (len < 1) { /* printf("fail1\n"); */ return False; }
2345    vassert(len <= 6);
2346    vassert(M >= (1 << len));
2347 
2348    vassert(len >= 1 && len <= 6);
2349    ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2350                   (1 << len) - 1;
2351    vassert(levels >= 1 && levels <= 63);
2352 
2353    if (immediate && ((imms & levels) == levels)) {
2354       /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2355       return False;
2356    }
2357 
2358    ULong S = imms & levels;
2359    ULong R = immr & levels;
2360    Int   diff = S - R;
2361    diff &= 63;
2362    Int esize = 1 << len;
2363    vassert(2 <= esize && esize <= 64);
2364 
2365    /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2366       same below with d.  S can be 63 in which case we have an out of
2367       range and hence undefined shift. */
2368    vassert(S >= 0 && S <= 63);
2369    vassert(esize >= (S+1));
2370    ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2371                   //(1ULL << (S+1)) - 1;
2372                   ((1ULL << S) - 1) + (1ULL << S);
2373 
2374    Int d = // diff<len-1:0>
2375            diff & ((1 << len)-1);
2376    vassert(esize >= (d+1));
2377    vassert(d >= 0 && d <= 63);
2378 
2379    ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2380                   //(1ULL << (d+1)) - 1;
2381                   ((1ULL << d) - 1) + (1ULL << d);
2382 
2383    if (esize != 64) vassert(elem_s < (1ULL << esize));
2384    if (esize != 64) vassert(elem_d < (1ULL << esize));
2385 
2386    if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2387    if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2388 
2389    return True;
2390 }
2391 
2392 
2393 static
dis_ARM64_data_processing_immediate(DisResult * dres,UInt insn)2394 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2395                                          UInt insn)
2396 {
2397 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
2398 
2399    /* insn[28:23]
2400       10000x PC-rel addressing
2401       10001x Add/subtract (immediate)
2402       100100 Logical (immediate)
2403       100101 Move Wide (immediate)
2404       100110 Bitfield
2405       100111 Extract
2406    */
2407 
2408    /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2409    if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2410       Bool is64   = INSN(31,31) == 1;
2411       Bool isSub  = INSN(30,30) == 1;
2412       Bool setCC  = INSN(29,29) == 1;
2413       UInt sh     = INSN(23,22);
2414       UInt uimm12 = INSN(21,10);
2415       UInt nn     = INSN(9,5);
2416       UInt dd     = INSN(4,0);
2417       const HChar* nm = isSub ? "sub" : "add";
2418       if (sh >= 2) {
2419          /* Invalid; fall through */
2420       } else {
2421          vassert(sh <= 1);
2422          uimm12 <<= (12 * sh);
2423          if (is64) {
2424             IRTemp argL  = newTemp(Ity_I64);
2425             IRTemp argR  = newTemp(Ity_I64);
2426             IRTemp res   = newTemp(Ity_I64);
2427             assign(argL, getIReg64orSP(nn));
2428             assign(argR, mkU64(uimm12));
2429             assign(res,  binop(isSub ? Iop_Sub64 : Iop_Add64,
2430                                mkexpr(argL), mkexpr(argR)));
2431             if (setCC) {
2432                putIReg64orZR(dd, mkexpr(res));
2433                setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2434                DIP("%ss %s, %s, 0x%x\n",
2435                    nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2436             } else {
2437                putIReg64orSP(dd, mkexpr(res));
2438                DIP("%s %s, %s, 0x%x\n",
2439                    nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2440             }
2441          } else {
2442             IRTemp argL  = newTemp(Ity_I32);
2443             IRTemp argR  = newTemp(Ity_I32);
2444             IRTemp res   = newTemp(Ity_I32);
2445             assign(argL, getIReg32orSP(nn));
2446             assign(argR, mkU32(uimm12));
2447             assign(res,  binop(isSub ? Iop_Sub32 : Iop_Add32,
2448                                mkexpr(argL), mkexpr(argR)));
2449             if (setCC) {
2450                putIReg32orZR(dd, mkexpr(res));
2451                setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2452                DIP("%ss %s, %s, 0x%x\n",
2453                    nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2454             } else {
2455                putIReg32orSP(dd, mkexpr(res));
2456                DIP("%s %s, %s, 0x%x\n",
2457                    nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2458             }
2459          }
2460          return True;
2461       }
2462    }
2463 
2464    /* -------------------- ADR/ADRP -------------------- */
2465    if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2466       UInt  bP    = INSN(31,31);
2467       UInt  immLo = INSN(30,29);
2468       UInt  immHi = INSN(23,5);
2469       UInt  rD    = INSN(4,0);
2470       ULong uimm  = (immHi << 2) | immLo;
2471       ULong simm  = sx_to_64(uimm, 21);
2472       ULong val;
2473       if (bP) {
2474          val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2475       } else {
2476          val = guest_PC_curr_instr + simm;
2477       }
2478       putIReg64orZR(rD, mkU64(val));
2479       DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2480       return True;
2481    }
2482 
2483    /* -------------------- LOGIC(imm) -------------------- */
2484    if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2485       /* 31 30 28     22 21   15   9  4
2486          sf op 100100 N  immr imms Rn Rd
2487            op=00: AND  Rd|SP, Rn, #imm
2488            op=01: ORR  Rd|SP, Rn, #imm
2489            op=10: EOR  Rd|SP, Rn, #imm
2490            op=11: ANDS Rd|ZR, Rn, #imm
2491       */
2492       Bool  is64 = INSN(31,31) == 1;
2493       UInt  op   = INSN(30,29);
2494       UInt  N    = INSN(22,22);
2495       UInt  immR = INSN(21,16);
2496       UInt  immS = INSN(15,10);
2497       UInt  nn   = INSN(9,5);
2498       UInt  dd   = INSN(4,0);
2499       ULong imm  = 0;
2500       Bool  ok;
2501       if (N == 1 && !is64)
2502          goto after_logic_imm; /* not allowed; fall through */
2503       ok = dbm_DecodeBitMasks(&imm, NULL,
2504                               N, immS, immR, True, is64 ? 64 : 32);
2505       if (!ok)
2506          goto after_logic_imm;
2507 
2508       const HChar* names[4] = { "and", "orr", "eor", "ands" };
2509       const IROp   ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2510       const IROp   ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2511 
2512       vassert(op < 4);
2513       if (is64) {
2514          IRExpr* argL = getIReg64orZR(nn);
2515          IRExpr* argR = mkU64(imm);
2516          IRTemp  res  = newTemp(Ity_I64);
2517          assign(res, binop(ops64[op], argL, argR));
2518          if (op < 3) {
2519             putIReg64orSP(dd, mkexpr(res));
2520             DIP("%s %s, %s, 0x%llx\n", names[op],
2521                 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2522          } else {
2523             putIReg64orZR(dd, mkexpr(res));
2524             setFlags_LOGIC(True/*is64*/, res);
2525             DIP("%s %s, %s, 0x%llx\n", names[op],
2526                 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2527          }
2528       } else {
2529          IRExpr* argL = getIReg32orZR(nn);
2530          IRExpr* argR = mkU32((UInt)imm);
2531          IRTemp  res  = newTemp(Ity_I32);
2532          assign(res, binop(ops32[op], argL, argR));
2533          if (op < 3) {
2534             putIReg32orSP(dd, mkexpr(res));
2535             DIP("%s %s, %s, 0x%x\n", names[op],
2536                 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2537          } else {
2538             putIReg32orZR(dd, mkexpr(res));
2539             setFlags_LOGIC(False/*!is64*/, res);
2540             DIP("%s %s, %s, 0x%x\n", names[op],
2541                 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2542          }
2543       }
2544       return True;
2545    }
2546    after_logic_imm:
2547 
2548    /* -------------------- MOV{Z,N,K} -------------------- */
2549    if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2550       /* 31 30 28      22 20    4
2551          |  |  |       |  |     |
2552          sf 10 100 101 hw imm16 Rd   MOV(Z) Rd, (imm16 << (16*hw))
2553          sf 00 100 101 hw imm16 Rd   MOV(N) Rd, ~(imm16 << (16*hw))
2554          sf 11 100 101 hw imm16 Rd   MOV(K) Rd, (imm16 << (16*hw))
2555       */
2556       Bool is64   = INSN(31,31) == 1;
2557       UInt subopc = INSN(30,29);
2558       UInt hw     = INSN(22,21);
2559       UInt imm16  = INSN(20,5);
2560       UInt dd     = INSN(4,0);
2561       if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2562          /* invalid; fall through */
2563       } else {
2564          ULong imm64 = ((ULong)imm16) << (16 * hw);
2565          if (!is64)
2566             vassert(imm64 < 0x100000000ULL);
2567          switch (subopc) {
2568             case BITS2(1,0): // MOVZ
2569                putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2570                DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2571                break;
2572             case BITS2(0,0): // MOVN
2573                imm64 = ~imm64;
2574                if (!is64)
2575                   imm64 &= 0xFFFFFFFFULL;
2576                putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2577                DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2578                break;
2579             case BITS2(1,1): // MOVK
2580                /* This is more complex.  We are inserting a slice into
2581                   the destination register, so we need to have the old
2582                   value of it. */
2583                if (is64) {
2584                   IRTemp old = newTemp(Ity_I64);
2585                   assign(old, getIReg64orZR(dd));
2586                   ULong mask = 0xFFFFULL << (16 * hw);
2587                   IRExpr* res
2588                      = binop(Iop_Or64,
2589                              binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2590                              mkU64(imm64));
2591                   putIReg64orZR(dd, res);
2592                   DIP("movk %s, 0x%x, lsl %u\n",
2593                       nameIReg64orZR(dd), imm16, 16*hw);
2594                } else {
2595                   IRTemp old = newTemp(Ity_I32);
2596                   assign(old, getIReg32orZR(dd));
2597                   vassert(hw <= 1);
2598                   UInt mask = ((UInt)0xFFFF) << (16 * hw);
2599                   IRExpr* res
2600                      = binop(Iop_Or32,
2601                              binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2602                              mkU32((UInt)imm64));
2603                   putIReg32orZR(dd, res);
2604                   DIP("movk %s, 0x%x, lsl %u\n",
2605                       nameIReg32orZR(dd), imm16, 16*hw);
2606                }
2607                break;
2608             default:
2609                vassert(0);
2610          }
2611          return True;
2612       }
2613    }
2614 
2615    /* -------------------- {U,S,}BFM -------------------- */
2616    /*    30 28     22 21   15   9  4
2617 
2618       sf 10 100110 N  immr imms nn dd
2619          UBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2620          UBFM Xd, Xn, #immr, #imms   when sf=1, N=1
2621 
2622       sf 00 100110 N  immr imms nn dd
2623          SBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2624          SBFM Xd, Xn, #immr, #imms   when sf=1, N=1
2625 
2626       sf 01 100110 N  immr imms nn dd
2627          BFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
2628          BFM Xd, Xn, #immr, #imms   when sf=1, N=1
2629    */
2630    if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2631       UInt sf     = INSN(31,31);
2632       UInt opc    = INSN(30,29);
2633       UInt N      = INSN(22,22);
2634       UInt immR   = INSN(21,16);
2635       UInt immS   = INSN(15,10);
2636       UInt nn     = INSN(9,5);
2637       UInt dd     = INSN(4,0);
2638       Bool inZero = False;
2639       Bool extend = False;
2640       const HChar* nm = "???";
2641       /* skip invalid combinations */
2642       switch (opc) {
2643          case BITS2(0,0):
2644             inZero = True; extend = True; nm = "sbfm"; break;
2645          case BITS2(0,1):
2646             inZero = False; extend = False; nm = "bfm"; break;
2647          case BITS2(1,0):
2648             inZero = True; extend = False; nm = "ubfm"; break;
2649          case BITS2(1,1):
2650             goto after_bfm; /* invalid */
2651          default:
2652             vassert(0);
2653       }
2654       if (sf == 1 && N != 1) goto after_bfm;
2655       if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2656                              || ((immS >> 5) & 1) != 0)) goto after_bfm;
2657       ULong wmask = 0, tmask = 0;
2658       Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2659                                    N, immS, immR, False, sf == 1 ? 64 : 32);
2660       if (!ok) goto after_bfm; /* hmmm */
2661 
2662       Bool   is64 = sf == 1;
2663       IRType ty   = is64 ? Ity_I64 : Ity_I32;
2664 
2665       IRTemp dst = newTemp(ty);
2666       IRTemp src = newTemp(ty);
2667       IRTemp bot = newTemp(ty);
2668       IRTemp top = newTemp(ty);
2669       IRTemp res = newTemp(ty);
2670       assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2671       assign(src, getIRegOrZR(is64, nn));
2672       /* perform bitfield move on low bits */
2673       assign(bot, binop(mkOR(ty),
2674                         binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2675                         binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2676                                          mkU(ty, wmask))));
2677       /* determine extension bits (sign, zero or dest register) */
2678       assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2679       /* combine extension bits and result bits */
2680       assign(res, binop(mkOR(ty),
2681                         binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2682                         binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2683       putIRegOrZR(is64, dd, mkexpr(res));
2684       DIP("%s %s, %s, immR=%u, immS=%u\n",
2685           nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2686       return True;
2687    }
2688    after_bfm:
2689 
2690    /* ---------------------- EXTR ---------------------- */
2691    /*   30 28     22 20 15   9 4
2692       1 00 100111 10 m  imm6 n d  EXTR Xd, Xn, Xm, #imm6
2693       0 00 100111 00 m  imm6 n d  EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2694    */
2695    if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2696       Bool is64  = INSN(31,31) == 1;
2697       UInt mm    = INSN(20,16);
2698       UInt imm6  = INSN(15,10);
2699       UInt nn    = INSN(9,5);
2700       UInt dd    = INSN(4,0);
2701       Bool valid = True;
2702       if (INSN(31,31) != INSN(22,22))
2703         valid = False;
2704       if (!is64 && imm6 >= 32)
2705         valid = False;
2706       if (!valid) goto after_extr;
2707       IRType ty    = is64 ? Ity_I64 : Ity_I32;
2708       IRTemp srcHi = newTemp(ty);
2709       IRTemp srcLo = newTemp(ty);
2710       IRTemp res   = newTemp(ty);
2711       assign(srcHi, getIRegOrZR(is64, nn));
2712       assign(srcLo, getIRegOrZR(is64, mm));
2713       if (imm6 == 0) {
2714         assign(res, mkexpr(srcLo));
2715       } else {
2716         UInt szBits = 8 * sizeofIRType(ty);
2717         vassert(imm6 > 0 && imm6 < szBits);
2718         assign(res, binop(mkOR(ty),
2719                           binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2720                           binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2721       }
2722       putIRegOrZR(is64, dd, mkexpr(res));
2723       DIP("extr %s, %s, %s, #%u\n",
2724           nameIRegOrZR(is64,dd),
2725           nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2726       return True;
2727    }
2728   after_extr:
2729 
2730    vex_printf("ARM64 front end: data_processing_immediate\n");
2731    return False;
2732 #  undef INSN
2733 }
2734 
2735 
2736 /*------------------------------------------------------------*/
2737 /*--- Data processing (register) instructions              ---*/
2738 /*------------------------------------------------------------*/
2739 
nameSH(UInt sh)2740 static const HChar* nameSH ( UInt sh ) {
2741    switch (sh) {
2742       case 0: return "lsl";
2743       case 1: return "lsr";
2744       case 2: return "asr";
2745       case 3: return "ror";
2746       default: vassert(0);
2747    }
2748 }
2749 
2750 /* Generate IR to get a register value, possibly shifted by an
2751    immediate.  Returns either a 32- or 64-bit temporary holding the
2752    result.  After the shift, the value can optionally be NOT-ed
2753    too.
2754 
2755    sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR.  sh_amt may only be
2756    in the range 0 to (is64 ? 64 : 32)-1.  For some instructions, ROR
2757    isn't allowed, but it's the job of the caller to check that.
2758 */
getShiftedIRegOrZR(Bool is64,UInt sh_how,UInt sh_amt,UInt regNo,Bool invert)2759 static IRTemp getShiftedIRegOrZR ( Bool is64,
2760                                    UInt sh_how, UInt sh_amt, UInt regNo,
2761                                    Bool invert )
2762 {
2763    vassert(sh_how < 4);
2764    vassert(sh_amt < (is64 ? 64 : 32));
2765    IRType ty = is64 ? Ity_I64 : Ity_I32;
2766    IRTemp t0 = newTemp(ty);
2767    assign(t0, getIRegOrZR(is64, regNo));
2768    IRTemp t1 = newTemp(ty);
2769    switch (sh_how) {
2770       case BITS2(0,0):
2771          assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2772          break;
2773       case BITS2(0,1):
2774          assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2775          break;
2776       case BITS2(1,0):
2777          assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2778          break;
2779       case BITS2(1,1):
2780          assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2781          break;
2782       default:
2783          vassert(0);
2784    }
2785    if (invert) {
2786       IRTemp t2 = newTemp(ty);
2787       assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2788       return t2;
2789    } else {
2790       return t1;
2791    }
2792 }
2793 
2794 
2795 static
dis_ARM64_data_processing_register(DisResult * dres,UInt insn)2796 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2797                                         UInt insn)
2798 {
2799 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
2800 
2801    /* ------------------- ADD/SUB(reg) ------------------- */
2802    /* x==0 => 32 bit op      x==1 => 64 bit op
2803       sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2804 
2805       31 30 29 28    23 21 20 15   9  4
2806       |  |  |  |     |  |  |  |    |  |
2807       x  0  0  01011 sh 0  Rm imm6 Rn Rd   ADD  Rd,Rn, sh(Rm,imm6)
2808       x  0  1  01011 sh 0  Rm imm6 Rn Rd   ADDS Rd,Rn, sh(Rm,imm6)
2809       x  1  0  01011 sh 0  Rm imm6 Rn Rd   SUB  Rd,Rn, sh(Rm,imm6)
2810       x  1  1  01011 sh 0  Rm imm6 Rn Rd   SUBS Rd,Rn, sh(Rm,imm6)
2811    */
2812    if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2813       UInt   bX    = INSN(31,31);
2814       UInt   bOP   = INSN(30,30); /* 0: ADD, 1: SUB */
2815       UInt   bS    = INSN(29, 29); /* set flags? */
2816       UInt   sh    = INSN(23,22);
2817       UInt   rM    = INSN(20,16);
2818       UInt   imm6  = INSN(15,10);
2819       UInt   rN    = INSN(9,5);
2820       UInt   rD    = INSN(4,0);
2821       Bool   isSUB = bOP == 1;
2822       Bool   is64  = bX == 1;
2823       IRType ty    = is64 ? Ity_I64 : Ity_I32;
2824       if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2825          /* invalid; fall through */
2826       } else {
2827          IRTemp argL = newTemp(ty);
2828          assign(argL, getIRegOrZR(is64, rN));
2829          IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2830          IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
2831          IRTemp res  = newTemp(ty);
2832          assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2833          if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2834          if (bS) {
2835             setFlags_ADD_SUB(is64, isSUB, argL, argR);
2836          }
2837          DIP("%s%s %s, %s, %s, %s #%u\n",
2838              bOP ? "sub" : "add", bS ? "s" : "",
2839              nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2840              nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2841          return True;
2842       }
2843    }
2844 
2845    /* ------------------- ADC/SBC(reg) ------------------- */
2846    /* x==0 => 32 bit op      x==1 => 64 bit op
2847 
2848       31 30 29 28    23 21 20 15     9  4
2849       |  |  |  |     |  |  |  |      |  |
2850       x  0  0  11010 00 0  Rm 000000 Rn Rd   ADC  Rd,Rn,Rm
2851       x  0  1  11010 00 0  Rm 000000 Rn Rd   ADCS Rd,Rn,Rm
2852       x  1  0  11010 00 0  Rm 000000 Rn Rd   SBC  Rd,Rn,Rm
2853       x  1  1  11010 00 0  Rm 000000 Rn Rd   SBCS Rd,Rn,Rm
2854    */
2855 
2856    if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2857       UInt   bX    = INSN(31,31);
2858       UInt   bOP   = INSN(30,30); /* 0: ADC, 1: SBC */
2859       UInt   bS    = INSN(29,29); /* set flags */
2860       UInt   rM    = INSN(20,16);
2861       UInt   rN    = INSN(9,5);
2862       UInt   rD    = INSN(4,0);
2863 
2864       Bool   isSUB = bOP == 1;
2865       Bool   is64  = bX == 1;
2866       IRType ty    = is64 ? Ity_I64 : Ity_I32;
2867 
2868       IRTemp oldC = newTemp(ty);
2869       assign(oldC,
2870              is64 ? mk_arm64g_calculate_flag_c()
2871                   : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2872 
2873       IRTemp argL = newTemp(ty);
2874       assign(argL, getIRegOrZR(is64, rN));
2875       IRTemp argR = newTemp(ty);
2876       assign(argR, getIRegOrZR(is64, rM));
2877 
2878       IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
2879       IRTemp res  = newTemp(ty);
2880       if (isSUB) {
2881          IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2882          IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2883          assign(res,
2884                 binop(op,
2885                       binop(op, mkexpr(argL), mkexpr(argR)),
2886                       binop(xorOp, mkexpr(oldC), one)));
2887       } else {
2888          assign(res,
2889                 binop(op,
2890                       binop(op, mkexpr(argL), mkexpr(argR)),
2891                       mkexpr(oldC)));
2892       }
2893 
2894       if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2895 
2896       if (bS) {
2897          setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2898       }
2899 
2900       DIP("%s%s %s, %s, %s\n",
2901           bOP ? "sbc" : "adc", bS ? "s" : "",
2902           nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2903           nameIRegOrZR(is64, rM));
2904       return True;
2905    }
2906 
2907    /* -------------------- LOGIC(reg) -------------------- */
2908    /* x==0 => 32 bit op      x==1 => 64 bit op
2909       N==0 => inv? is no-op (no inversion)
2910       N==1 => inv? is NOT
2911       sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2912 
2913       31 30 28    23 21 20 15   9  4
2914       |  |  |     |  |  |  |    |  |
2915       x  00 01010 sh N  Rm imm6 Rn Rd  AND  Rd,Rn, inv?(sh(Rm,imm6))
2916       x  01 01010 sh N  Rm imm6 Rn Rd  ORR  Rd,Rn, inv?(sh(Rm,imm6))
2917       x  10 01010 sh N  Rm imm6 Rn Rd  EOR  Rd,Rn, inv?(sh(Rm,imm6))
2918       x  11 01010 sh N  Rm imm6 Rn Rd  ANDS Rd,Rn, inv?(sh(Rm,imm6))
2919       With N=1, the names are: BIC ORN EON BICS
2920    */
2921    if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2922       UInt   bX   = INSN(31,31);
2923       UInt   sh   = INSN(23,22);
2924       UInt   bN   = INSN(21,21);
2925       UInt   rM   = INSN(20,16);
2926       UInt   imm6 = INSN(15,10);
2927       UInt   rN   = INSN(9,5);
2928       UInt   rD   = INSN(4,0);
2929       Bool   is64 = bX == 1;
2930       IRType ty   = is64 ? Ity_I64 : Ity_I32;
2931       if (!is64 && imm6 > 31) {
2932          /* invalid; fall though */
2933       } else {
2934          IRTemp argL = newTemp(ty);
2935          assign(argL, getIRegOrZR(is64, rN));
2936          IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2937          IROp   op   = Iop_INVALID;
2938          switch (INSN(30,29)) {
2939             case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2940             case BITS2(0,1):                  op = mkOR(ty);  break;
2941             case BITS2(1,0):                  op = mkXOR(ty); break;
2942             default: vassert(0);
2943          }
2944          IRTemp res = newTemp(ty);
2945          assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2946          if (INSN(30,29) == BITS2(1,1)) {
2947             setFlags_LOGIC(is64, res);
2948          }
2949          putIRegOrZR(is64, rD, mkexpr(res));
2950 
2951          static const HChar* names_op[8]
2952             = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2953          vassert(((bN << 2) | INSN(30,29)) < 8);
2954          const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2955          /* Special-case the printing of "MOV" */
2956          if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2957             DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2958                                 nameIRegOrZR(is64, rM));
2959          } else {
2960             DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2961                 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2962                 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2963          }
2964          return True;
2965       }
2966    }
2967 
2968    /* -------------------- {U,S}MULH -------------------- */
2969    /* 31       23 22 20 15     9   4
2970       10011011 1  10 Rm 011111 Rn Rd   UMULH Xd,Xn,Xm
2971       10011011 0  10 Rm 011111 Rn Rd   SMULH Xd,Xn,Xm
2972    */
2973    if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2974        && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2975       Bool isU = INSN(23,23) == 1;
2976       UInt mm  = INSN(20,16);
2977       UInt nn  = INSN(9,5);
2978       UInt dd  = INSN(4,0);
2979       putIReg64orZR(dd, unop(Iop_128HIto64,
2980                              binop(isU ? Iop_MullU64 : Iop_MullS64,
2981                                    getIReg64orZR(nn), getIReg64orZR(mm))));
2982       DIP("%cmulh %s, %s, %s\n",
2983           isU ? 'u' : 's',
2984           nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2985       return True;
2986    }
2987 
2988    /* -------------------- M{ADD,SUB} -------------------- */
2989    /* 31 30           20 15 14 9 4
2990       sf 00 11011 000 m  0  a  n r   MADD Rd,Rn,Rm,Ra  d = a+m*n
2991       sf 00 11011 000 m  1  a  n r   MADD Rd,Rn,Rm,Ra  d = a-m*n
2992    */
2993    if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2994       Bool is64  = INSN(31,31) == 1;
2995       UInt mm    = INSN(20,16);
2996       Bool isAdd = INSN(15,15) == 0;
2997       UInt aa    = INSN(14,10);
2998       UInt nn    = INSN(9,5);
2999       UInt dd    = INSN(4,0);
3000       if (is64) {
3001          putIReg64orZR(
3002             dd,
3003             binop(isAdd ? Iop_Add64 : Iop_Sub64,
3004                   getIReg64orZR(aa),
3005                   binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3006       } else {
3007          putIReg32orZR(
3008             dd,
3009             binop(isAdd ? Iop_Add32 : Iop_Sub32,
3010                   getIReg32orZR(aa),
3011                   binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3012       }
3013       DIP("%s %s, %s, %s, %s\n",
3014           isAdd ? "madd" : "msub",
3015           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3016           nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3017       return True;
3018    }
3019 
3020    /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3021    /* 31 30 28        20 15   11 9  4
3022       sf 00 1101 0100 mm cond 00 nn dd   CSEL  Rd,Rn,Rm
3023       sf 00 1101 0100 mm cond 01 nn dd   CSINC Rd,Rn,Rm
3024       sf 10 1101 0100 mm cond 00 nn dd   CSINV Rd,Rn,Rm
3025       sf 10 1101 0100 mm cond 01 nn dd   CSNEG Rd,Rn,Rm
3026       In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3027    */
3028    if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3029       Bool    is64 = INSN(31,31) == 1;
3030       UInt    b30  = INSN(30,30);
3031       UInt    mm   = INSN(20,16);
3032       UInt    cond = INSN(15,12);
3033       UInt    b10  = INSN(10,10);
3034       UInt    nn   = INSN(9,5);
3035       UInt    dd   = INSN(4,0);
3036       UInt    op   = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3037       IRType  ty   = is64 ? Ity_I64 : Ity_I32;
3038       IRExpr* argL = getIRegOrZR(is64, nn);
3039       IRExpr* argR = getIRegOrZR(is64, mm);
3040       switch (op) {
3041          case BITS2(0,0):
3042             break;
3043          case BITS2(0,1):
3044             argR = binop(mkADD(ty), argR, mkU(ty,1));
3045             break;
3046          case BITS2(1,0):
3047             argR = unop(mkNOT(ty), argR);
3048             break;
3049          case BITS2(1,1):
3050             argR = binop(mkSUB(ty), mkU(ty,0), argR);
3051             break;
3052          default:
3053             vassert(0);
3054       }
3055       putIRegOrZR(
3056          is64, dd,
3057          IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3058                     argL, argR)
3059       );
3060       const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3061       DIP("%s %s, %s, %s, %s\n", op_nm[op],
3062           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3063           nameIRegOrZR(is64, mm), nameCC(cond));
3064       return True;
3065    }
3066 
3067    /* -------------- ADD/SUB(extended reg) -------------- */
3068    /*     28         20 15  12   9 4
3069       000 01011 00 1 m  opt imm3 n d   ADD  Wd|SP, Wn|SP, Wm ext&lsld
3070       100 01011 00 1 m  opt imm3 n d   ADD  Xd|SP, Xn|SP, Rm ext&lsld
3071 
3072       001 01011 00 1 m  opt imm3 n d   ADDS Wd,    Wn|SP, Wm ext&lsld
3073       101 01011 00 1 m  opt imm3 n d   ADDS Xd,    Xn|SP, Rm ext&lsld
3074 
3075       010 01011 00 1 m  opt imm3 n d   SUB  Wd|SP, Wn|SP, Wm ext&lsld
3076       110 01011 00 1 m  opt imm3 n d   SUB  Xd|SP, Xn|SP, Rm ext&lsld
3077 
3078       011 01011 00 1 m  opt imm3 n d   SUBS Wd,    Wn|SP, Wm ext&lsld
3079       111 01011 00 1 m  opt imm3 n d   SUBS Xd,    Xn|SP, Rm ext&lsld
3080 
3081       The 'm' operand is extended per opt, thusly:
3082 
3083         000   Xm & 0xFF           UXTB
3084         001   Xm & 0xFFFF         UXTH
3085         010   Xm & (2^32)-1       UXTW
3086         011   Xm                  UXTX
3087 
3088         100   Xm sx from bit 7    SXTB
3089         101   Xm sx from bit 15   SXTH
3090         110   Xm sx from bit 31   SXTW
3091         111   Xm                  SXTX
3092 
3093       In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3094       operation on Xm.  In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3095       are the identity operation on Wm.
3096 
3097       After extension, the value is shifted left by imm3 bits, which
3098       may only be in the range 0 .. 4 inclusive.
3099    */
3100    if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3101       Bool is64  = INSN(31,31) == 1;
3102       Bool isSub = INSN(30,30) == 1;
3103       Bool setCC = INSN(29,29) == 1;
3104       UInt mm    = INSN(20,16);
3105       UInt opt   = INSN(15,13);
3106       UInt imm3  = INSN(12,10);
3107       UInt nn    = INSN(9,5);
3108       UInt dd    = INSN(4,0);
3109       const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3110                                   "sxtb", "sxth", "sxtw", "sxtx" };
3111       /* Do almost the same thing in the 32- and 64-bit cases. */
3112       IRTemp xN = newTemp(Ity_I64);
3113       IRTemp xM = newTemp(Ity_I64);
3114       assign(xN, getIReg64orSP(nn));
3115       assign(xM, getIReg64orZR(mm));
3116       IRExpr* xMw  = mkexpr(xM); /* "xM widened" */
3117       Int     shSX = 0;
3118       /* widen Xm .. */
3119       switch (opt) {
3120          case BITS3(0,0,0): // UXTB
3121             xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3122          case BITS3(0,0,1): // UXTH
3123             xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3124          case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3125             if (is64) {
3126                xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3127             }
3128             break;
3129          case BITS3(0,1,1): // UXTX -- always a noop
3130             break;
3131          case BITS3(1,0,0): // SXTB
3132             shSX = 56; goto sxTo64;
3133          case BITS3(1,0,1): // SXTH
3134             shSX = 48; goto sxTo64;
3135          case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3136             if (is64) {
3137                shSX = 32; goto sxTo64;
3138             }
3139             break;
3140          case BITS3(1,1,1): // SXTX -- always a noop
3141             break;
3142          sxTo64:
3143             vassert(shSX >= 32);
3144             xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3145                         mkU8(shSX));
3146             break;
3147          default:
3148             vassert(0);
3149       }
3150       /* and now shift */
3151       IRTemp argL = xN;
3152       IRTemp argR = newTemp(Ity_I64);
3153       assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3154       IRTemp res = newTemp(Ity_I64);
3155       assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3156                         mkexpr(argL), mkexpr(argR)));
3157       if (is64) {
3158          if (setCC) {
3159             putIReg64orZR(dd, mkexpr(res));
3160             setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3161          } else {
3162             putIReg64orSP(dd, mkexpr(res));
3163          }
3164       } else {
3165          if (setCC) {
3166             IRTemp argL32 = newTemp(Ity_I32);
3167             IRTemp argR32 = newTemp(Ity_I32);
3168             putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3169             assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3170             assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3171             setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3172          } else {
3173             putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3174          }
3175       }
3176       DIP("%s%s %s, %s, %s %s lsl %u\n",
3177           isSub ? "sub" : "add", setCC ? "s" : "",
3178           setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3179           nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3180           nameExt[opt], imm3);
3181       return True;
3182    }
3183 
3184    /* ---------------- CCMP/CCMN(imm) ---------------- */
3185    /* Bizarrely, these appear in the "data processing register"
3186       category, even though they are operations against an
3187       immediate. */
3188    /* 31   29        20   15   11 9    3
3189       sf 1 111010010 imm5 cond 10 Rn 0 nzcv   CCMP Rn, #imm5, #nzcv, cond
3190       sf 0 111010010 imm5 cond 10 Rn 0 nzcv   CCMN Rn, #imm5, #nzcv, cond
3191 
3192       Operation is:
3193          (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3194          (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3195    */
3196    if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3197        && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3198       Bool is64  = INSN(31,31) == 1;
3199       Bool isSUB = INSN(30,30) == 1;
3200       UInt imm5  = INSN(20,16);
3201       UInt cond  = INSN(15,12);
3202       UInt nn    = INSN(9,5);
3203       UInt nzcv  = INSN(3,0);
3204 
3205       IRTemp condT = newTemp(Ity_I1);
3206       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3207 
3208       IRType ty   = is64 ? Ity_I64 : Ity_I32;
3209       IRTemp argL = newTemp(ty);
3210       IRTemp argR = newTemp(ty);
3211 
3212       if (is64) {
3213          assign(argL, getIReg64orZR(nn));
3214          assign(argR, mkU64(imm5));
3215       } else {
3216          assign(argL, getIReg32orZR(nn));
3217          assign(argR, mkU32(imm5));
3218       }
3219       setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3220 
3221       DIP("ccm%c %s, #%u, #%u, %s\n",
3222           isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3223           imm5, nzcv, nameCC(cond));
3224       return True;
3225    }
3226 
3227    /* ---------------- CCMP/CCMN(reg) ---------------- */
3228    /* 31   29        20 15   11 9    3
3229       sf 1 111010010 Rm cond 00 Rn 0 nzcv   CCMP Rn, Rm, #nzcv, cond
3230       sf 0 111010010 Rm cond 00 Rn 0 nzcv   CCMN Rn, Rm, #nzcv, cond
3231       Operation is:
3232          (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3233          (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3234    */
3235    if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3236        && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3237       Bool is64  = INSN(31,31) == 1;
3238       Bool isSUB = INSN(30,30) == 1;
3239       UInt mm    = INSN(20,16);
3240       UInt cond  = INSN(15,12);
3241       UInt nn    = INSN(9,5);
3242       UInt nzcv  = INSN(3,0);
3243 
3244       IRTemp condT = newTemp(Ity_I1);
3245       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3246 
3247       IRType ty   = is64 ? Ity_I64 : Ity_I32;
3248       IRTemp argL = newTemp(ty);
3249       IRTemp argR = newTemp(ty);
3250 
3251       if (is64) {
3252          assign(argL, getIReg64orZR(nn));
3253          assign(argR, getIReg64orZR(mm));
3254       } else {
3255          assign(argL, getIReg32orZR(nn));
3256          assign(argR, getIReg32orZR(mm));
3257       }
3258       setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3259 
3260       DIP("ccm%c %s, %s, #%u, %s\n",
3261           isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3262           nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3263       return True;
3264    }
3265 
3266 
3267    /* -------------- REV/REV16/REV32/RBIT -------------- */
3268    /* 31 30 28       20    15   11 9 4
3269 
3270       1  10 11010110 00000 0000 11 n d    (1) REV   Xd, Xn
3271       0  10 11010110 00000 0000 10 n d    (2) REV   Wd, Wn
3272 
3273       1  10 11010110 00000 0000 00 n d    (3) RBIT  Xd, Xn
3274       0  10 11010110 00000 0000 00 n d    (4) RBIT  Wd, Wn
3275 
3276       1  10 11010110 00000 0000 01 n d    (5) REV16 Xd, Xn
3277       0  10 11010110 00000 0000 01 n d    (6) REV16 Wd, Wn
3278 
3279       1  10 11010110 00000 0000 10 n d    (7) REV32 Xd, Xn
3280    */
3281    if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3282        && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3283       UInt b31 = INSN(31,31);
3284       UInt opc = INSN(11,10);
3285 
3286       UInt ix = 0;
3287       /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3288       else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3289       else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3290       else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3291       else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3292       else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3293       else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
3294       if (ix >= 1 && ix <= 7) {
3295          Bool   is64  = ix == 1 || ix == 3 || ix == 5 || ix == 7;
3296          UInt   nn    = INSN(9,5);
3297          UInt   dd    = INSN(4,0);
3298          IRTemp src   = newTemp(Ity_I64);
3299          IRTemp dst   = IRTemp_INVALID;
3300          IRTemp (*math)(IRTemp) = NULL;
3301          switch (ix) {
3302             case 1: case 2: math = math_BYTESWAP64;   break;
3303             case 3: case 4: math = math_BITSWAP64;    break;
3304             case 5: case 6: math = math_USHORTSWAP64; break;
3305             case 7:         math = math_UINTSWAP64;   break;
3306             default: vassert(0);
3307          }
3308          const HChar* names[7]
3309            = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3310          const HChar* nm = names[ix-1];
3311          vassert(math);
3312          if (ix == 6) {
3313             /* This has to be special cased, since the logic below doesn't
3314                handle it correctly. */
3315             assign(src, getIReg64orZR(nn));
3316             dst = math(src);
3317             putIReg64orZR(dd,
3318                           unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3319          } else if (is64) {
3320             assign(src, getIReg64orZR(nn));
3321             dst = math(src);
3322             putIReg64orZR(dd, mkexpr(dst));
3323          } else {
3324             assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
3325             dst = math(src);
3326             putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3327          }
3328          DIP("%s %s, %s\n", nm,
3329              nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3330          return True;
3331       }
3332       /* else fall through */
3333    }
3334 
3335    /* -------------------- CLZ/CLS -------------------- */
3336    /*    30 28   24   20    15      9 4
3337       sf 10 1101 0110 00000 00010 0 n d    CLZ Rd, Rn
3338       sf 10 1101 0110 00000 00010 1 n d    CLS Rd, Rn
3339    */
3340    if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3341        && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3342       Bool   is64  = INSN(31,31) == 1;
3343       Bool   isCLS = INSN(10,10) == 1;
3344       UInt   nn    = INSN(9,5);
3345       UInt   dd    = INSN(4,0);
3346       IRTemp src   = newTemp(Ity_I64);
3347       IRTemp srcZ  = newTemp(Ity_I64);
3348       IRTemp dst   = newTemp(Ity_I64);
3349       /* Get the argument, widened out to 64 bit */
3350       if (is64) {
3351          assign(src, getIReg64orZR(nn));
3352       } else {
3353          assign(src, binop(Iop_Shl64,
3354                            unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3355       }
3356       /* If this is CLS, mash the arg around accordingly */
3357       if (isCLS) {
3358          IRExpr* one = mkU8(1);
3359          assign(srcZ,
3360          binop(Iop_Xor64,
3361                binop(Iop_Shl64, mkexpr(src), one),
3362                binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3363       } else {
3364          assign(srcZ, mkexpr(src));
3365       }
3366       /* And compute CLZ. */
3367       if (is64) {
3368          assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3369                                 mkU64(isCLS ? 63 : 64),
3370                                 unop(Iop_Clz64, mkexpr(srcZ))));
3371          putIReg64orZR(dd, mkexpr(dst));
3372       } else {
3373          assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3374                                 mkU64(isCLS ? 31 : 32),
3375                                 unop(Iop_Clz64, mkexpr(srcZ))));
3376          putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3377       }
3378       DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3379           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3380       return True;
3381    }
3382 
3383    /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3384    /*    30 28        20 15   11 9 4
3385       sf 00 1101 0110 m  0010 00 n d   LSLV Rd,Rn,Rm
3386       sf 00 1101 0110 m  0010 01 n d   LSRV Rd,Rn,Rm
3387       sf 00 1101 0110 m  0010 10 n d   ASRV Rd,Rn,Rm
3388       sf 00 1101 0110 m  0010 11 n d   RORV Rd,Rn,Rm
3389    */
3390    if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3391        && INSN(15,12) == BITS4(0,0,1,0)) {
3392       Bool   is64 = INSN(31,31) == 1;
3393       UInt   mm   = INSN(20,16);
3394       UInt   op   = INSN(11,10);
3395       UInt   nn   = INSN(9,5);
3396       UInt   dd   = INSN(4,0);
3397       IRType ty   = is64 ? Ity_I64 : Ity_I32;
3398       IRTemp srcL = newTemp(ty);
3399       IRTemp srcR = newTemp(Ity_I64);
3400       IRTemp res  = newTemp(ty);
3401       IROp   iop  = Iop_INVALID;
3402       assign(srcL, getIRegOrZR(is64, nn));
3403       assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3404                                     mkU64(is64 ? 63 : 31)));
3405       if (op < 3) {
3406          // LSLV, LSRV, ASRV
3407          switch (op) {
3408             case BITS2(0,0): iop = mkSHL(ty); break;
3409             case BITS2(0,1): iop = mkSHR(ty); break;
3410             case BITS2(1,0): iop = mkSAR(ty); break;
3411             default: vassert(0);
3412          }
3413          assign(res, binop(iop, mkexpr(srcL),
3414                                 unop(Iop_64to8, mkexpr(srcR))));
3415       } else {
3416          // RORV
3417          IROp opSHL = mkSHL(ty);
3418          IROp opSHR = mkSHR(ty);
3419          IROp opOR  = mkOR(ty);
3420          IRExpr* width = mkU64(is64 ? 64: 32);
3421          assign(
3422             res,
3423             IRExpr_ITE(
3424                binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3425                mkexpr(srcL),
3426                binop(opOR,
3427                      binop(opSHL,
3428                            mkexpr(srcL),
3429                            unop(Iop_64to8, binop(Iop_Sub64, width,
3430                                                             mkexpr(srcR)))),
3431                      binop(opSHR,
3432                            mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3433          ));
3434       }
3435       putIRegOrZR(is64, dd, mkexpr(res));
3436       vassert(op < 4);
3437       const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
3438       DIP("%s %s, %s, %s\n",
3439           names[op], nameIRegOrZR(is64,dd),
3440                      nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3441       return True;
3442    }
3443 
3444    /* -------------------- SDIV/UDIV -------------------- */
3445    /*    30 28        20 15    10 9 4
3446       sf 00 1101 0110 m  00001  1 n d  SDIV Rd,Rn,Rm
3447       sf 00 1101 0110 m  00001  0 n d  UDIV Rd,Rn,Rm
3448    */
3449    if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3450        && INSN(15,11) == BITS5(0,0,0,0,1)) {
3451       Bool is64 = INSN(31,31) == 1;
3452       UInt mm   = INSN(20,16);
3453       Bool isS  = INSN(10,10) == 1;
3454       UInt nn   = INSN(9,5);
3455       UInt dd   = INSN(4,0);
3456       if (isS) {
3457          putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3458                                      getIRegOrZR(is64, nn),
3459                                      getIRegOrZR(is64, mm)));
3460       } else {
3461          putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3462                                      getIRegOrZR(is64, nn),
3463                                      getIRegOrZR(is64, mm)));
3464       }
3465       DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3466           nameIRegOrZR(is64, dd),
3467           nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3468       return True;
3469    }
3470 
3471    /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3472    /* 31        23  20 15 14 9 4
3473       1001 1011 101 m  0  a  n d   UMADDL Xd,Wn,Wm,Xa
3474       1001 1011 001 m  0  a  n d   SMADDL Xd,Wn,Wm,Xa
3475       1001 1011 101 m  1  a  n d   UMSUBL Xd,Wn,Wm,Xa
3476       1001 1011 001 m  1  a  n d   SMSUBL Xd,Wn,Wm,Xa
3477       with operation
3478          Xd = Xa +/- (Wn *u/s Wm)
3479    */
3480    if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3481       Bool   isU   = INSN(23,23) == 1;
3482       UInt   mm    = INSN(20,16);
3483       Bool   isAdd = INSN(15,15) == 0;
3484       UInt   aa    = INSN(14,10);
3485       UInt   nn    = INSN(9,5);
3486       UInt   dd    = INSN(4,0);
3487       IRTemp wN    = newTemp(Ity_I32);
3488       IRTemp wM    = newTemp(Ity_I32);
3489       IRTemp xA    = newTemp(Ity_I64);
3490       IRTemp muld  = newTemp(Ity_I64);
3491       IRTemp res   = newTemp(Ity_I64);
3492       assign(wN, getIReg32orZR(nn));
3493       assign(wM, getIReg32orZR(mm));
3494       assign(xA, getIReg64orZR(aa));
3495       assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3496                          mkexpr(wN), mkexpr(wM)));
3497       assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3498                         mkexpr(xA), mkexpr(muld)));
3499       putIReg64orZR(dd, mkexpr(res));
3500       DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3501           nameIReg64orZR(dd), nameIReg32orZR(nn),
3502           nameIReg32orZR(mm), nameIReg64orZR(aa));
3503       return True;
3504    }
3505 
3506    /* -------------------- CRC32/CRC32C -------------------- */
3507    /* 31 30           20 15   11 9 4
3508       sf 00 1101 0110 m  0100 sz n d   CRC32<sz>  Wd, Wn, Wm|Xm
3509       sf 00 1101 0110 m  0101 sz n d   CRC32C<sz> Wd, Wn, Wm|Xm
3510    */
3511    if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3512        && INSN(15,13) == BITS3(0,1,0)) {
3513       UInt bitSF = INSN(31,31);
3514       UInt mm    = INSN(20,16);
3515       UInt bitC  = INSN(12,12);
3516       UInt sz    = INSN(11,10);
3517       UInt nn    = INSN(9,5);
3518       UInt dd    = INSN(4,0);
3519       vassert(sz >= 0 && sz <= 3);
3520       if ((bitSF == 0 && sz <= BITS2(1,0))
3521           || (bitSF == 1 && sz == BITS2(1,1))) {
3522          UInt ix = (bitC == 1 ? 4 : 0) | sz;
3523          void* helpers[8]
3524             = { &arm64g_calc_crc32b,   &arm64g_calc_crc32h,
3525                 &arm64g_calc_crc32w,   &arm64g_calc_crc32x,
3526                 &arm64g_calc_crc32cb,  &arm64g_calc_crc32ch,
3527                 &arm64g_calc_crc32cw,  &arm64g_calc_crc32cx };
3528          const HChar* hNames[8]
3529             = { "arm64g_calc_crc32b",  "arm64g_calc_crc32h",
3530                 "arm64g_calc_crc32w",  "arm64g_calc_crc32x",
3531                 "arm64g_calc_crc32cb", "arm64g_calc_crc32ch",
3532                 "arm64g_calc_crc32cw", "arm64g_calc_crc32cx" };
3533          const HChar* iNames[8]
3534             = { "crc32b",  "crc32h",  "crc32w",  "crc32x",
3535                 "crc32cb", "crc32ch", "crc32cw", "crc32cx" };
3536 
3537          IRTemp srcN = newTemp(Ity_I64);
3538          assign(srcN, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
3539 
3540          IRTemp  srcM = newTemp(Ity_I64);
3541          IRExpr* at64 = getIReg64orZR(mm);
3542          switch (sz) {
3543             case BITS2(0,0):
3544                assign(srcM, binop(Iop_And64, at64, mkU64(0xFF))); break;
3545             case BITS2(0,1):
3546                assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFF))); break;
3547             case BITS2(1,0):
3548                assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFFFFFF))); break;
3549             case BITS2(1,1):
3550                assign(srcM, at64); break;
3551             default:
3552                vassert(0);
3553          }
3554 
3555          vassert(ix >= 0 && ix <= 7);
3556 
3557          putIReg64orZR(
3558             dd,
3559             unop(Iop_32Uto64,
3560                  unop(Iop_64to32,
3561                       mkIRExprCCall(Ity_I64, 0/*regparm*/,
3562                                     hNames[ix], helpers[ix],
3563                                     mkIRExprVec_2(mkexpr(srcN),
3564                                                   mkexpr(srcM))))));
3565 
3566          DIP("%s %s, %s, %s\n", iNames[ix],
3567              nameIReg32orZR(dd),
3568              nameIReg32orZR(nn), nameIRegOrZR(bitSF == 1, mm));
3569          return True;
3570       }
3571       /* fall through */
3572    }
3573 
3574    vex_printf("ARM64 front end: data_processing_register\n");
3575    return False;
3576 #  undef INSN
3577 }
3578 
3579 
3580 /*------------------------------------------------------------*/
3581 /*--- Math helpers for vector interleave/deinterleave      ---*/
3582 /*------------------------------------------------------------*/
3583 
3584 #define EX(_tmp) \
3585            mkexpr(_tmp)
3586 #define SL(_hi128,_lo128,_nbytes) \
3587            ( (_nbytes) == 0 \
3588                 ? (_lo128) \
3589                 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3590 #define ROR(_v128,_nbytes) \
3591            SL((_v128),(_v128),(_nbytes))
3592 #define ROL(_v128,_nbytes) \
3593            SL((_v128),(_v128),16-(_nbytes))
3594 #define SHR(_v128,_nbytes) \
3595            binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3596 #define SHL(_v128,_nbytes) \
3597            binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3598 #define ILO64x2(_argL,_argR) \
3599            binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3600 #define IHI64x2(_argL,_argR) \
3601            binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3602 #define ILO32x4(_argL,_argR) \
3603            binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3604 #define IHI32x4(_argL,_argR) \
3605            binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3606 #define ILO16x8(_argL,_argR) \
3607            binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3608 #define IHI16x8(_argL,_argR) \
3609            binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3610 #define ILO8x16(_argL,_argR) \
3611            binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3612 #define IHI8x16(_argL,_argR) \
3613            binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3614 #define CEV32x4(_argL,_argR) \
3615            binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3616 #define COD32x4(_argL,_argR) \
3617            binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3618 #define COD16x8(_argL,_argR) \
3619            binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3620 #define COD8x16(_argL,_argR) \
3621            binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3622 #define CEV8x16(_argL,_argR) \
3623            binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3624 #define AND(_arg1,_arg2) \
3625            binop(Iop_AndV128,(_arg1),(_arg2))
3626 #define OR2(_arg1,_arg2) \
3627            binop(Iop_OrV128,(_arg1),(_arg2))
3628 #define OR3(_arg1,_arg2,_arg3) \
3629            binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3630 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3631            binop(Iop_OrV128, \
3632                  binop(Iop_OrV128,(_arg1),(_arg2)), \
3633                  binop(Iop_OrV128,(_arg3),(_arg4)))
3634 
3635 
3636 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3637 static
math_INTERLEAVE1_128(IRTemp * i0,UInt laneSzBlg2,IRTemp u0)3638 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3639                            UInt laneSzBlg2, IRTemp u0 )
3640 {
3641    assign(*i0, mkexpr(u0));
3642 }
3643 
3644 
3645 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3646 static
math_INTERLEAVE2_128(IRTemp * i0,IRTemp * i1,UInt laneSzBlg2,IRTemp u0,IRTemp u1)3647 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3648                            UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3649 {
3650    /* This is pretty easy, since we have primitives directly to
3651       hand. */
3652    if (laneSzBlg2 == 3) {
3653       // 64x2
3654       // u1 == B1 B0, u0 == A1 A0
3655       // i1 == B1 A1, i0 == B0 A0
3656       assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3657       assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3658       return;
3659    }
3660    if (laneSzBlg2 == 2) {
3661       // 32x4
3662       // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3663       // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3664       assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3665       assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3666       return;
3667    }
3668    if (laneSzBlg2 == 1) {
3669       // 16x8
3670       // u1 == B{7..0}, u0 == A{7..0}
3671       // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3672       // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3673       assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3674       assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3675       return;
3676    }
3677    if (laneSzBlg2 == 0) {
3678       // 8x16
3679       // u1 == B{f..0}, u0 == A{f..0}
3680       // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3681       // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3682       assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3683       assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3684       return;
3685    }
3686    /*NOTREACHED*/
3687    vassert(0);
3688 }
3689 
3690 
3691 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3692 static
math_INTERLEAVE3_128(IRTemp * i0,IRTemp * i1,IRTemp * i2,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2)3693 void math_INTERLEAVE3_128(
3694         /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3695         UInt laneSzBlg2,
3696         IRTemp u0, IRTemp u1, IRTemp u2 )
3697 {
3698    if (laneSzBlg2 == 3) {
3699       // 64x2
3700       // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3701       // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3702       assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3703       assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3704       assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3705       return;
3706    }
3707 
3708    if (laneSzBlg2 == 2) {
3709       // 32x4
3710       // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3711       // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3712       // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3713       IRTemp p0    = newTempV128();
3714       IRTemp p1    = newTempV128();
3715       IRTemp p2    = newTempV128();
3716       IRTemp c1100 = newTempV128();
3717       IRTemp c0011 = newTempV128();
3718       IRTemp c0110 = newTempV128();
3719       assign(c1100, mkV128(0xFF00));
3720       assign(c0011, mkV128(0x00FF));
3721       assign(c0110, mkV128(0x0FF0));
3722       // First interleave them at 64x2 granularity,
3723       // generating partial ("p") values.
3724       math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3725       // And more shuffling around for the final answer
3726       assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3727                        AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3728       assign(*i1, OR3( SHL(EX(p2),12),
3729                        AND(EX(p1),EX(c0110)),
3730                        SHR(EX(p0),12) ));
3731       assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3732                        AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3733       return;
3734    }
3735 
3736    if (laneSzBlg2 == 1) {
3737       // 16x8
3738       // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3739       // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3740       // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3741       //
3742       // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3743       // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3744       // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3745       //
3746       // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3747       // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3748       // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3749       IRTemp p0    = newTempV128();
3750       IRTemp p1    = newTempV128();
3751       IRTemp p2    = newTempV128();
3752       IRTemp c1000 = newTempV128();
3753       IRTemp c0100 = newTempV128();
3754       IRTemp c0010 = newTempV128();
3755       IRTemp c0001 = newTempV128();
3756       assign(c1000, mkV128(0xF000));
3757       assign(c0100, mkV128(0x0F00));
3758       assign(c0010, mkV128(0x00F0));
3759       assign(c0001, mkV128(0x000F));
3760       // First interleave them at 32x4 granularity,
3761       // generating partial ("p") values.
3762       math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3763       // And more shuffling around for the final answer
3764       assign(*i2,
3765              OR4( AND( IHI16x8( EX(p2),        ROL(EX(p2),4) ), EX(c1000) ),
3766                   AND( IHI16x8( ROL(EX(p2),6), EX(p2)        ), EX(c0100) ),
3767                   AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3768                   AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3769       ));
3770       assign(*i1,
3771              OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3772                   AND( IHI16x8( EX(p1),        ROL(EX(p1),4) ), EX(c0100) ),
3773                   AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3774                   AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3775       ));
3776       assign(*i0,
3777              OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3778                   AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3779                   AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3780                   AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3781       ));
3782       return;
3783    }
3784 
3785    if (laneSzBlg2 == 0) {
3786       // 8x16.  It doesn't seem worth the hassle of first doing a
3787       // 16x8 interleave, so just generate all 24 partial results
3788       // directly :-(
3789       // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3790       // i2 == Cf Bf Af Ce .. Bb Ab Ca
3791       // i1 == Ba Aa C9 B9 .. A6 C5 B5
3792       // i0 == A5 C4 B4 A4 .. C0 B0 A0
3793 
3794       IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3795       IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3796       IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3797       IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3798       IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3799       IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3800       IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3801       IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3802       IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3803 
3804       // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3805       // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3806       //
3807 #     define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3808          IRTemp t_##_tempName = newTempV128(); \
3809          assign(t_##_tempName, \
3810                 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3811                          ROR(EX(_srcVec2),(_srcShift2)) ) )
3812 
3813       // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3814       IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3815 
3816       // The slicing and reassembly are done as interleavedly as possible,
3817       // so as to minimise the demand for registers in the back end, which
3818       // was observed to be a problem in testing.
3819 
3820       XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3821       XXXX(AfCe, AA, 0xf, CC, 0xe);
3822       assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3823 
3824       XXXX(BeAe, BB, 0xe, AA, 0xe);
3825       XXXX(CdBd, CC, 0xd, BB, 0xd);
3826       assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3827       assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3828 
3829       XXXX(AdCc, AA, 0xd, CC, 0xc);
3830       XXXX(BcAc, BB, 0xc, AA, 0xc);
3831       assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3832 
3833       XXXX(CbBb, CC, 0xb, BB, 0xb);
3834       XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3835       assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3836       assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3837       assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3838 
3839       XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3840       XXXX(C9B9, CC, 0x9, BB, 0x9);
3841       assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3842 
3843       XXXX(A9C8, AA, 0x9, CC, 0x8);
3844       XXXX(B8A8, BB, 0x8, AA, 0x8);
3845       assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3846       assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3847 
3848       XXXX(C7B7, CC, 0x7, BB, 0x7);
3849       XXXX(A7C6, AA, 0x7, CC, 0x6);
3850       assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3851 
3852       XXXX(B6A6, BB, 0x6, AA, 0x6);
3853       XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3854       assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3855       assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3856       assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3857 
3858       XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3859       XXXX(B4A4, BB, 0x4, AA, 0x4);
3860       assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3861 
3862       XXXX(C3B3, CC, 0x3, BB, 0x3);
3863       XXXX(A3C2, AA, 0x3, CC, 0x2);
3864       assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3865       assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
3866 
3867       XXXX(B2A2, BB, 0x2, AA, 0x2);
3868       XXXX(C1B1, CC, 0x1, BB, 0x1);
3869       assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
3870 
3871       XXXX(A1C0, AA, 0x1, CC, 0x0);
3872       XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
3873       assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
3874       assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
3875       assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
3876 
3877 #     undef XXXX
3878       return;
3879    }
3880 
3881    /*NOTREACHED*/
3882    vassert(0);
3883 }
3884 
3885 
3886 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3887 static
math_INTERLEAVE4_128(IRTemp * i0,IRTemp * i1,IRTemp * i2,IRTemp * i3,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2,IRTemp u3)3888 void math_INTERLEAVE4_128(
3889         /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
3890         UInt laneSzBlg2,
3891         IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
3892 {
3893    if (laneSzBlg2 == 3) {
3894       // 64x2
3895       assign(*i0, ILO64x2(EX(u1), EX(u0)));
3896       assign(*i1, ILO64x2(EX(u3), EX(u2)));
3897       assign(*i2, IHI64x2(EX(u1), EX(u0)));
3898       assign(*i3, IHI64x2(EX(u3), EX(u2)));
3899       return;
3900    }
3901    if (laneSzBlg2 == 2) {
3902       // 32x4
3903       // First, interleave at the 64-bit lane size.
3904       IRTemp p0 = newTempV128();
3905       IRTemp p1 = newTempV128();
3906       IRTemp p2 = newTempV128();
3907       IRTemp p3 = newTempV128();
3908       math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
3909       // And interleave (cat) at the 32 bit size.
3910       assign(*i0, CEV32x4(EX(p1), EX(p0)));
3911       assign(*i1, COD32x4(EX(p1), EX(p0)));
3912       assign(*i2, CEV32x4(EX(p3), EX(p2)));
3913       assign(*i3, COD32x4(EX(p3), EX(p2)));
3914       return;
3915    }
3916    if (laneSzBlg2 == 1) {
3917       // 16x8
3918       // First, interleave at the 32-bit lane size.
3919       IRTemp p0 = newTempV128();
3920       IRTemp p1 = newTempV128();
3921       IRTemp p2 = newTempV128();
3922       IRTemp p3 = newTempV128();
3923       math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
3924       // And rearrange within each vector, to get the right 16 bit lanes.
3925       assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
3926       assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
3927       assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
3928       assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
3929       return;
3930    }
3931    if (laneSzBlg2 == 0) {
3932       // 8x16
3933       // First, interleave at the 16-bit lane size.
3934       IRTemp p0 = newTempV128();
3935       IRTemp p1 = newTempV128();
3936       IRTemp p2 = newTempV128();
3937       IRTemp p3 = newTempV128();
3938       math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
3939       // And rearrange within each vector, to get the right 8 bit lanes.
3940       assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
3941       assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
3942       assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
3943       assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
3944       return;
3945    }
3946    /*NOTREACHED*/
3947    vassert(0);
3948 }
3949 
3950 
3951 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3952 static
math_DEINTERLEAVE1_128(IRTemp * u0,UInt laneSzBlg2,IRTemp i0)3953 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
3954                              UInt laneSzBlg2, IRTemp i0 )
3955 {
3956    assign(*u0, mkexpr(i0));
3957 }
3958 
3959 
3960 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3961 static
math_DEINTERLEAVE2_128(IRTemp * u0,IRTemp * u1,UInt laneSzBlg2,IRTemp i0,IRTemp i1)3962 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
3963                              UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
3964 {
3965    /* This is pretty easy, since we have primitives directly to
3966       hand. */
3967    if (laneSzBlg2 == 3) {
3968       // 64x2
3969       // i1 == B1 A1, i0 == B0 A0
3970       // u1 == B1 B0, u0 == A1 A0
3971       assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
3972       assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
3973       return;
3974    }
3975    if (laneSzBlg2 == 2) {
3976       // 32x4
3977       // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3978       // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3979       assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
3980       assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
3981       return;
3982    }
3983    if (laneSzBlg2 == 1) {
3984       // 16x8
3985       // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3986       // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3987       // u1 == B{7..0}, u0 == A{7..0}
3988       assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
3989       assign(*u1, binop(Iop_CatOddLanes16x8,  mkexpr(i1), mkexpr(i0)));
3990       return;
3991    }
3992    if (laneSzBlg2 == 0) {
3993       // 8x16
3994       // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3995       // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3996       // u1 == B{f..0}, u0 == A{f..0}
3997       assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
3998       assign(*u1, binop(Iop_CatOddLanes8x16,  mkexpr(i1), mkexpr(i0)));
3999       return;
4000    }
4001    /*NOTREACHED*/
4002    vassert(0);
4003 }
4004 
4005 
4006 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
4007 static
math_DEINTERLEAVE3_128(IRTemp * u0,IRTemp * u1,IRTemp * u2,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2)4008 void math_DEINTERLEAVE3_128(
4009         /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4010         UInt laneSzBlg2,
4011         IRTemp i0, IRTemp i1, IRTemp i2 )
4012 {
4013    if (laneSzBlg2 == 3) {
4014       // 64x2
4015       // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
4016       // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
4017       assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1)        ));
4018       assign(*u1, ILO64x2( EX(i2),        ROL(EX(i0),8) ));
4019       assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0)        ));
4020       return;
4021    }
4022 
4023    if (laneSzBlg2 == 2) {
4024       // 32x4
4025       // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
4026       // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
4027       // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
4028       IRTemp t_a1c0b0a0 = newTempV128();
4029       IRTemp t_a2c1b1a1 = newTempV128();
4030       IRTemp t_a3c2b2a2 = newTempV128();
4031       IRTemp t_a0c3b3a3 = newTempV128();
4032       IRTemp p0 = newTempV128();
4033       IRTemp p1 = newTempV128();
4034       IRTemp p2 = newTempV128();
4035       // Compute some intermediate values.
4036       assign(t_a1c0b0a0, EX(i0));
4037       assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
4038       assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
4039       assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
4040       // First deinterleave into lane-pairs
4041       assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
4042       assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
4043                          IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
4044       assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
4045       // Then deinterleave at 64x2 granularity.
4046       math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
4047       return;
4048    }
4049 
4050    if (laneSzBlg2 == 1) {
4051       // 16x8
4052       // u2 == C7 C6 C5 C4 C3 C2 C1 C0
4053       // u1 == B7 B6 B5 B4 B3 B2 B1 B0
4054       // u0 == A7 A6 A5 A4 A3 A2 A1 A0
4055       //
4056       // i2 == C7 B7 A7 C6 B6 A6 C5 B5
4057       // i1 == A5 C4 B4 A4 C4 B3 A3 C2
4058       // i0 == B2 A2 C1 B1 A1 C0 B0 A0
4059       //
4060       // p2 == C7 C6 B7 B6 A7 A6 C5 C4
4061       // p1 == B5 B4 A5 A4 C3 C2 B3 B2
4062       // p0 == A3 A2 C1 C0 B1 B0 A1 A0
4063 
4064       IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
4065       s0 = s1 = s2 = s3
4066          = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
4067       newTempsV128_4(&s0, &s1, &s2, &s3);
4068       newTempsV128_4(&t0, &t1, &t2, &t3);
4069       newTempsV128_4(&p0, &p1, &p2, &c00111111);
4070 
4071       // s0 == b2a2 c1b1a1 c0b0a0
4072       // s1 == b4a4 c3b3c3 c2b2a2
4073       // s2 == b6a6 c5b5a5 c4b4a4
4074       // s3 == b0a0 c7b7a7 c6b6a6
4075       assign(s0, EX(i0));
4076       assign(s1, SL(EX(i1),EX(i0),6*2));
4077       assign(s2, SL(EX(i2),EX(i1),4*2));
4078       assign(s3, SL(EX(i0),EX(i2),2*2));
4079 
4080       // t0 == 0 0 c1c0 b1b0 a1a0
4081       // t1 == 0 0 c3c2 b3b2 a3a2
4082       // t2 == 0 0 c5c4 b5b4 a5a4
4083       // t3 == 0 0 c7c6 b7b6 a7a6
4084       assign(c00111111, mkV128(0x0FFF));
4085       assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4086       assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4087       assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4088       assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4089 
4090       assign(p0, OR2(EX(t0),          SHL(EX(t1),6*2)));
4091       assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4092       assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4093 
4094       // Then deinterleave at 32x4 granularity.
4095       math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4096       return;
4097    }
4098 
4099    if (laneSzBlg2 == 0) {
4100       // 8x16.  This is the same scheme as for 16x8, with twice the
4101       // number of intermediate values.
4102       //
4103       // u2 == C{f..0}
4104       // u1 == B{f..0}
4105       // u0 == A{f..0}
4106       //
4107       // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4108       // i1 ==  BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4109       // i0 ==   A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4110       //
4111       // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4112       // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4113       // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4114       //
4115       IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4116              t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4117       s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4118          = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4119          = IRTemp_INVALID;
4120       newTempsV128_4(&s0, &s1, &s2, &s3);
4121       newTempsV128_4(&s4, &s5, &s6, &s7);
4122       newTempsV128_4(&t0, &t1, &t2, &t3);
4123       newTempsV128_4(&t4, &t5, &t6, &t7);
4124       newTempsV128_4(&p0, &p1, &p2, &cMASK);
4125 
4126       // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4127       // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4128       // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4129       // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4130       // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4131       // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4132       // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4133       // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4134       assign(s0, SL(EX(i1),EX(i0), 0));
4135       assign(s1, SL(EX(i1),EX(i0), 6));
4136       assign(s2, SL(EX(i1),EX(i0),12));
4137       assign(s3, SL(EX(i2),EX(i1), 2));
4138       assign(s4, SL(EX(i2),EX(i1), 8));
4139       assign(s5, SL(EX(i2),EX(i1),14));
4140       assign(s6, SL(EX(i0),EX(i2), 4));
4141       assign(s7, SL(EX(i0),EX(i2),10));
4142 
4143       // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4144       // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4145       // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4146       // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4147       // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4148       // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4149       // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4150       // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4151       assign(cMASK, mkV128(0x003F));
4152       assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4153       assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4154       assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4155       assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4156       assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4157       assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4158       assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4159       assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4160 
4161       assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4162       assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4163                  SHL(EX(t3),2), SHR(EX(t2),4) ));
4164       assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4165 
4166       // Then deinterleave at 16x8 granularity.
4167       math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4168       return;
4169    }
4170 
4171    /*NOTREACHED*/
4172    vassert(0);
4173 }
4174 
4175 
4176 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4177 static
math_DEINTERLEAVE4_128(IRTemp * u0,IRTemp * u1,IRTemp * u2,IRTemp * u3,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2,IRTemp i3)4178 void math_DEINTERLEAVE4_128(
4179         /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4180         UInt laneSzBlg2,
4181         IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4182 {
4183    if (laneSzBlg2 == 3) {
4184       // 64x2
4185       assign(*u0, ILO64x2(EX(i2), EX(i0)));
4186       assign(*u1, IHI64x2(EX(i2), EX(i0)));
4187       assign(*u2, ILO64x2(EX(i3), EX(i1)));
4188       assign(*u3, IHI64x2(EX(i3), EX(i1)));
4189       return;
4190    }
4191    if (laneSzBlg2 == 2) {
4192       // 32x4
4193       IRTemp p0 = newTempV128();
4194       IRTemp p2 = newTempV128();
4195       IRTemp p1 = newTempV128();
4196       IRTemp p3 = newTempV128();
4197       assign(p0, ILO32x4(EX(i1), EX(i0)));
4198       assign(p1, IHI32x4(EX(i1), EX(i0)));
4199       assign(p2, ILO32x4(EX(i3), EX(i2)));
4200       assign(p3, IHI32x4(EX(i3), EX(i2)));
4201       // And now do what we did for the 64-bit case.
4202       math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4203       return;
4204    }
4205    if (laneSzBlg2 == 1) {
4206       // 16x8
4207       // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4208       IRTemp p0 = newTempV128();
4209       IRTemp p1 = newTempV128();
4210       IRTemp p2 = newTempV128();
4211       IRTemp p3 = newTempV128();
4212       assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4213       assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4214       assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4215       assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4216       // From here on is like the 32 bit case.
4217       math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4218       return;
4219    }
4220    if (laneSzBlg2 == 0) {
4221       // 8x16
4222       // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4223       IRTemp p0 = newTempV128();
4224       IRTemp p1 = newTempV128();
4225       IRTemp p2 = newTempV128();
4226       IRTemp p3 = newTempV128();
4227       assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4228                           ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4229       assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4230                           ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4231       assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4232                           ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4233       assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4234                           ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4235       // From here on is like the 16 bit case.
4236       math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4237       return;
4238    }
4239    /*NOTREACHED*/
4240    vassert(0);
4241 }
4242 
4243 
4244 /* Wrappers that use the full-width (de)interleavers to do half-width
4245    (de)interleaving.  The scheme is to clone each input lane in the
4246    lower half of each incoming value, do a full width (de)interleave
4247    at the next lane size up, and remove every other lane of the the
4248    result.  The returned values may have any old junk in the upper
4249    64 bits -- the caller must ignore that. */
4250 
4251 /* Helper function -- get doubling and narrowing operations. */
4252 static
math_get_doubler_and_halver(IROp * doubler,IROp * halver,UInt laneSzBlg2)4253 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4254                                    /*OUT*/IROp* halver,
4255                                    UInt laneSzBlg2 )
4256 {
4257    switch (laneSzBlg2) {
4258       case 2:
4259          *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4260          break;
4261       case 1:
4262          *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4263          break;
4264       case 0:
4265          *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4266          break;
4267       default:
4268          vassert(0);
4269    }
4270 }
4271 
4272 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4273 static
math_INTERLEAVE1_64(IRTemp * i0,UInt laneSzBlg2,IRTemp u0)4274 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4275                           UInt laneSzBlg2, IRTemp u0 )
4276 {
4277    assign(*i0, mkexpr(u0));
4278 }
4279 
4280 
4281 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4282 static
math_INTERLEAVE2_64(IRTemp * i0,IRTemp * i1,UInt laneSzBlg2,IRTemp u0,IRTemp u1)4283 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4284                           UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4285 {
4286    if (laneSzBlg2 == 3) {
4287       // 1x64, degenerate case
4288       assign(*i0, EX(u0));
4289       assign(*i1, EX(u1));
4290       return;
4291    }
4292 
4293    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4294    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4295    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4296 
4297    IRTemp du0 = newTempV128();
4298    IRTemp du1 = newTempV128();
4299    assign(du0, binop(doubler, EX(u0), EX(u0)));
4300    assign(du1, binop(doubler, EX(u1), EX(u1)));
4301    IRTemp di0 = newTempV128();
4302    IRTemp di1 = newTempV128();
4303    math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4304    assign(*i0, binop(halver, EX(di0), EX(di0)));
4305    assign(*i1, binop(halver, EX(di1), EX(di1)));
4306 }
4307 
4308 
4309 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4310 static
math_INTERLEAVE3_64(IRTemp * i0,IRTemp * i1,IRTemp * i2,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2)4311 void math_INTERLEAVE3_64(
4312         /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4313         UInt laneSzBlg2,
4314         IRTemp u0, IRTemp u1, IRTemp u2 )
4315 {
4316    if (laneSzBlg2 == 3) {
4317       // 1x64, degenerate case
4318       assign(*i0, EX(u0));
4319       assign(*i1, EX(u1));
4320       assign(*i2, EX(u2));
4321       return;
4322    }
4323 
4324    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4325    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4326    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4327 
4328    IRTemp du0 = newTempV128();
4329    IRTemp du1 = newTempV128();
4330    IRTemp du2 = newTempV128();
4331    assign(du0, binop(doubler, EX(u0), EX(u0)));
4332    assign(du1, binop(doubler, EX(u1), EX(u1)));
4333    assign(du2, binop(doubler, EX(u2), EX(u2)));
4334    IRTemp di0 = newTempV128();
4335    IRTemp di1 = newTempV128();
4336    IRTemp di2 = newTempV128();
4337    math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4338    assign(*i0, binop(halver, EX(di0), EX(di0)));
4339    assign(*i1, binop(halver, EX(di1), EX(di1)));
4340    assign(*i2, binop(halver, EX(di2), EX(di2)));
4341 }
4342 
4343 
4344 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4345 static
math_INTERLEAVE4_64(IRTemp * i0,IRTemp * i1,IRTemp * i2,IRTemp * i3,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2,IRTemp u3)4346 void math_INTERLEAVE4_64(
4347         /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4348         UInt laneSzBlg2,
4349         IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4350 {
4351    if (laneSzBlg2 == 3) {
4352       // 1x64, degenerate case
4353       assign(*i0, EX(u0));
4354       assign(*i1, EX(u1));
4355       assign(*i2, EX(u2));
4356       assign(*i3, EX(u3));
4357       return;
4358    }
4359 
4360    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4361    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4362    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4363 
4364    IRTemp du0 = newTempV128();
4365    IRTemp du1 = newTempV128();
4366    IRTemp du2 = newTempV128();
4367    IRTemp du3 = newTempV128();
4368    assign(du0, binop(doubler, EX(u0), EX(u0)));
4369    assign(du1, binop(doubler, EX(u1), EX(u1)));
4370    assign(du2, binop(doubler, EX(u2), EX(u2)));
4371    assign(du3, binop(doubler, EX(u3), EX(u3)));
4372    IRTemp di0 = newTempV128();
4373    IRTemp di1 = newTempV128();
4374    IRTemp di2 = newTempV128();
4375    IRTemp di3 = newTempV128();
4376    math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4377                         laneSzBlg2 + 1, du0, du1, du2, du3);
4378    assign(*i0, binop(halver, EX(di0), EX(di0)));
4379    assign(*i1, binop(halver, EX(di1), EX(di1)));
4380    assign(*i2, binop(halver, EX(di2), EX(di2)));
4381    assign(*i3, binop(halver, EX(di3), EX(di3)));
4382 }
4383 
4384 
4385 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4386 static
math_DEINTERLEAVE1_64(IRTemp * u0,UInt laneSzBlg2,IRTemp i0)4387 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4388                             UInt laneSzBlg2, IRTemp i0 )
4389 {
4390    assign(*u0, mkexpr(i0));
4391 }
4392 
4393 
4394 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4395 static
math_DEINTERLEAVE2_64(IRTemp * u0,IRTemp * u1,UInt laneSzBlg2,IRTemp i0,IRTemp i1)4396 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4397                             UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4398 {
4399    if (laneSzBlg2 == 3) {
4400       // 1x64, degenerate case
4401       assign(*u0, EX(i0));
4402       assign(*u1, EX(i1));
4403       return;
4404    }
4405 
4406    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4407    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4408    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4409 
4410    IRTemp di0 = newTempV128();
4411    IRTemp di1 = newTempV128();
4412    assign(di0, binop(doubler, EX(i0), EX(i0)));
4413    assign(di1, binop(doubler, EX(i1), EX(i1)));
4414 
4415    IRTemp du0 = newTempV128();
4416    IRTemp du1 = newTempV128();
4417    math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4418    assign(*u0, binop(halver, EX(du0), EX(du0)));
4419    assign(*u1, binop(halver, EX(du1), EX(du1)));
4420 }
4421 
4422 
4423 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4424 static
math_DEINTERLEAVE3_64(IRTemp * u0,IRTemp * u1,IRTemp * u2,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2)4425 void math_DEINTERLEAVE3_64(
4426         /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4427         UInt laneSzBlg2,
4428         IRTemp i0, IRTemp i1, IRTemp i2 )
4429 {
4430    if (laneSzBlg2 == 3) {
4431       // 1x64, degenerate case
4432       assign(*u0, EX(i0));
4433       assign(*u1, EX(i1));
4434       assign(*u2, EX(i2));
4435       return;
4436    }
4437 
4438    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4439    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4440    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4441 
4442    IRTemp di0 = newTempV128();
4443    IRTemp di1 = newTempV128();
4444    IRTemp di2 = newTempV128();
4445    assign(di0, binop(doubler, EX(i0), EX(i0)));
4446    assign(di1, binop(doubler, EX(i1), EX(i1)));
4447    assign(di2, binop(doubler, EX(i2), EX(i2)));
4448    IRTemp du0 = newTempV128();
4449    IRTemp du1 = newTempV128();
4450    IRTemp du2 = newTempV128();
4451    math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4452    assign(*u0, binop(halver, EX(du0), EX(du0)));
4453    assign(*u1, binop(halver, EX(du1), EX(du1)));
4454    assign(*u2, binop(halver, EX(du2), EX(du2)));
4455 }
4456 
4457 
4458 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4459 static
math_DEINTERLEAVE4_64(IRTemp * u0,IRTemp * u1,IRTemp * u2,IRTemp * u3,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2,IRTemp i3)4460 void math_DEINTERLEAVE4_64(
4461         /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4462         UInt laneSzBlg2,
4463         IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4464 {
4465    if (laneSzBlg2 == 3) {
4466       // 1x64, degenerate case
4467       assign(*u0, EX(i0));
4468       assign(*u1, EX(i1));
4469       assign(*u2, EX(i2));
4470       assign(*u3, EX(i3));
4471       return;
4472    }
4473 
4474    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4475    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4476    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4477 
4478    IRTemp di0 = newTempV128();
4479    IRTemp di1 = newTempV128();
4480    IRTemp di2 = newTempV128();
4481    IRTemp di3 = newTempV128();
4482    assign(di0, binop(doubler, EX(i0), EX(i0)));
4483    assign(di1, binop(doubler, EX(i1), EX(i1)));
4484    assign(di2, binop(doubler, EX(i2), EX(i2)));
4485    assign(di3, binop(doubler, EX(i3), EX(i3)));
4486    IRTemp du0 = newTempV128();
4487    IRTemp du1 = newTempV128();
4488    IRTemp du2 = newTempV128();
4489    IRTemp du3 = newTempV128();
4490    math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4491                           laneSzBlg2 + 1, di0, di1, di2, di3);
4492    assign(*u0, binop(halver, EX(du0), EX(du0)));
4493    assign(*u1, binop(halver, EX(du1), EX(du1)));
4494    assign(*u2, binop(halver, EX(du2), EX(du2)));
4495    assign(*u3, binop(halver, EX(du3), EX(du3)));
4496 }
4497 
4498 
4499 #undef EX
4500 #undef SL
4501 #undef ROR
4502 #undef ROL
4503 #undef SHR
4504 #undef SHL
4505 #undef ILO64x2
4506 #undef IHI64x2
4507 #undef ILO32x4
4508 #undef IHI32x4
4509 #undef ILO16x8
4510 #undef IHI16x8
4511 #undef ILO16x8
4512 #undef IHI16x8
4513 #undef CEV32x4
4514 #undef COD32x4
4515 #undef COD16x8
4516 #undef COD8x16
4517 #undef CEV8x16
4518 #undef AND
4519 #undef OR2
4520 #undef OR3
4521 #undef OR4
4522 
4523 
4524 /*------------------------------------------------------------*/
4525 /*--- Load and Store instructions                          ---*/
4526 /*------------------------------------------------------------*/
4527 
4528 /* Generate the EA for a "reg + reg" style amode.  This is done from
4529    parts of the insn, but for sanity checking sake it takes the whole
4530    insn.  This appears to depend on insn[15:12], with opt=insn[15:13]
4531    and S=insn[12]:
4532 
4533    The possible forms, along with their opt:S values, are:
4534       011:0   Xn|SP + Xm
4535       111:0   Xn|SP + Xm
4536       011:1   Xn|SP + Xm * transfer_szB
4537       111:1   Xn|SP + Xm * transfer_szB
4538       010:0   Xn|SP + 32Uto64(Wm)
4539       010:1   Xn|SP + 32Uto64(Wm) * transfer_szB
4540       110:0   Xn|SP + 32Sto64(Wm)
4541       110:1   Xn|SP + 32Sto64(Wm) * transfer_szB
4542 
4543    Rm is insn[20:16].  Rn is insn[9:5].  Rt is insn[4:0].  Log2 of
4544    the transfer size is insn[23,31,30].  For integer loads/stores,
4545    insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4546 
4547    If the decoding fails, it returns IRTemp_INVALID.
4548 
4549    isInt is True iff this is decoding is for transfers to/from integer
4550    registers.  If False it is for transfers to/from vector registers.
4551 */
gen_indexed_EA(HChar * buf,UInt insn,Bool isInt)4552 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4553 {
4554    UInt    optS  = SLICE_UInt(insn, 15, 12);
4555    UInt    mm    = SLICE_UInt(insn, 20, 16);
4556    UInt    nn    = SLICE_UInt(insn, 9, 5);
4557    UInt    szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4558                    | SLICE_UInt(insn, 31, 30); // Log2 of the size
4559 
4560    buf[0] = 0;
4561 
4562    /* Sanity checks, that this really is a load/store insn. */
4563    if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4564       goto fail;
4565 
4566    if (isInt
4567        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4568        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4569        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4570        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4571       goto fail;
4572 
4573    if (!isInt
4574        && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4575       goto fail;
4576 
4577    /* Throw out non-verified but possibly valid cases. */
4578    switch (szLg2) {
4579       case BITS3(0,0,0): break; //  8 bit, valid for both int and vec
4580       case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4581       case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4582       case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4583       case BITS3(1,0,0): // can only ever be valid for the vector case
4584                          if (isInt) goto fail; else break;
4585       case BITS3(1,0,1): // these sizes are never valid
4586       case BITS3(1,1,0):
4587       case BITS3(1,1,1): goto fail;
4588 
4589       default: vassert(0);
4590    }
4591 
4592    IRExpr* rhs  = NULL;
4593    switch (optS) {
4594       case BITS4(1,1,1,0): goto fail; //ATC
4595       case BITS4(0,1,1,0):
4596          rhs = getIReg64orZR(mm);
4597          vex_sprintf(buf, "[%s, %s]",
4598                      nameIReg64orZR(nn), nameIReg64orZR(mm));
4599          break;
4600       case BITS4(1,1,1,1): goto fail; //ATC
4601       case BITS4(0,1,1,1):
4602          rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4603          vex_sprintf(buf, "[%s, %s lsl %u]",
4604                      nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4605          break;
4606       case BITS4(0,1,0,0):
4607          rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4608          vex_sprintf(buf, "[%s, %s uxtx]",
4609                      nameIReg64orZR(nn), nameIReg32orZR(mm));
4610          break;
4611       case BITS4(0,1,0,1):
4612          rhs = binop(Iop_Shl64,
4613                      unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4614          vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4615                      nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4616          break;
4617       case BITS4(1,1,0,0):
4618          rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4619          vex_sprintf(buf, "[%s, %s sxtx]",
4620                      nameIReg64orZR(nn), nameIReg32orZR(mm));
4621          break;
4622       case BITS4(1,1,0,1):
4623          rhs = binop(Iop_Shl64,
4624                      unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4625          vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4626                      nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4627          break;
4628       default:
4629          /* The rest appear to be genuinely invalid */
4630          goto fail;
4631    }
4632 
4633    vassert(rhs);
4634    IRTemp res = newTemp(Ity_I64);
4635    assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4636    return res;
4637 
4638   fail:
4639    vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4640    return IRTemp_INVALID;
4641 }
4642 
4643 
4644 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4645    bits of DATAE :: Ity_I64. */
gen_narrowing_store(UInt szB,IRTemp addr,IRExpr * dataE)4646 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4647 {
4648    IRExpr* addrE = mkexpr(addr);
4649    switch (szB) {
4650       case 8:
4651          storeLE(addrE, dataE);
4652          break;
4653       case 4:
4654          storeLE(addrE, unop(Iop_64to32, dataE));
4655          break;
4656       case 2:
4657          storeLE(addrE, unop(Iop_64to16, dataE));
4658          break;
4659       case 1:
4660          storeLE(addrE, unop(Iop_64to8, dataE));
4661          break;
4662       default:
4663          vassert(0);
4664    }
4665 }
4666 
4667 
4668 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4669    placing the result in an Ity_I64 temporary. */
gen_zwidening_load(UInt szB,IRTemp addr)4670 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4671 {
4672    IRTemp  res   = newTemp(Ity_I64);
4673    IRExpr* addrE = mkexpr(addr);
4674    switch (szB) {
4675       case 8:
4676          assign(res, loadLE(Ity_I64,addrE));
4677          break;
4678       case 4:
4679          assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4680          break;
4681       case 2:
4682          assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4683          break;
4684       case 1:
4685          assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4686          break;
4687       default:
4688          vassert(0);
4689    }
4690    return res;
4691 }
4692 
4693 
4694 /* Generate a "standard 7" name, from bitQ and size.  But also
4695    allow ".1d" since that's occasionally useful. */
4696 static
nameArr_Q_SZ(UInt bitQ,UInt size)4697 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4698 {
4699    vassert(bitQ <= 1 && size <= 3);
4700    const HChar* nms[8]
4701       = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4702    UInt ix = (bitQ << 2) | size;
4703    vassert(ix < 8);
4704    return nms[ix];
4705 }
4706 
4707 
4708 static
dis_ARM64_load_store(DisResult * dres,UInt insn,const VexAbiInfo * abiinfo)4709 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4710                           const VexAbiInfo* abiinfo
4711 )
4712 {
4713 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
4714 
4715    /* ------------ LDR,STR (immediate, uimm12) ----------- */
4716    /* uimm12 is scaled by the transfer size
4717 
4718       31 29  26    21    9  4
4719       |  |   |     |     |  |
4720       11 111 00100 imm12 nn tt    STR  Xt, [Xn|SP, #imm12 * 8]
4721       11 111 00101 imm12 nn tt    LDR  Xt, [Xn|SP, #imm12 * 8]
4722 
4723       10 111 00100 imm12 nn tt    STR  Wt, [Xn|SP, #imm12 * 4]
4724       10 111 00101 imm12 nn tt    LDR  Wt, [Xn|SP, #imm12 * 4]
4725 
4726       01 111 00100 imm12 nn tt    STRH Wt, [Xn|SP, #imm12 * 2]
4727       01 111 00101 imm12 nn tt    LDRH Wt, [Xn|SP, #imm12 * 2]
4728 
4729       00 111 00100 imm12 nn tt    STRB Wt, [Xn|SP, #imm12 * 1]
4730       00 111 00101 imm12 nn tt    LDRB Wt, [Xn|SP, #imm12 * 1]
4731    */
4732    if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4733       UInt   szLg2 = INSN(31,30);
4734       UInt   szB   = 1 << szLg2;
4735       Bool   isLD  = INSN(22,22) == 1;
4736       UInt   offs  = INSN(21,10) * szB;
4737       UInt   nn    = INSN(9,5);
4738       UInt   tt    = INSN(4,0);
4739       IRTemp ta    = newTemp(Ity_I64);
4740       assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4741       if (nn == 31) { /* FIXME generate stack alignment check */ }
4742       vassert(szLg2 < 4);
4743       if (isLD) {
4744          putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4745       } else {
4746          gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4747       }
4748       const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4749       const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4750       DIP("%s %s, [%s, #%u]\n",
4751           (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4752           nameIReg64orSP(nn), offs);
4753       return True;
4754    }
4755 
4756    /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4757    /*
4758       31 29  26      20   11 9  4
4759       |  |   |       |    |  |  |
4760       (at-Rn-then-Rn=EA)  |  |  |
4761       sz 111 00000 0 imm9 01 Rn Rt   STR Rt, [Xn|SP], #simm9
4762       sz 111 00001 0 imm9 01 Rn Rt   LDR Rt, [Xn|SP], #simm9
4763 
4764       (at-EA-then-Rn=EA)
4765       sz 111 00000 0 imm9 11 Rn Rt   STR Rt, [Xn|SP, #simm9]!
4766       sz 111 00001 0 imm9 11 Rn Rt   LDR Rt, [Xn|SP, #simm9]!
4767 
4768       (at-EA)
4769       sz 111 00000 0 imm9 00 Rn Rt   STR Rt, [Xn|SP, #simm9]
4770       sz 111 00001 0 imm9 00 Rn Rt   LDR Rt, [Xn|SP, #simm9]
4771 
4772       simm9 is unscaled.
4773 
4774       The case 'wback && Rn == Rt && Rt != 31' is disallowed.  In the
4775       load case this is because would create two competing values for
4776       Rt.  In the store case the reason is unclear, but the spec
4777       disallows it anyway.
4778 
4779       Stores are narrowing, loads are unsigned widening.  sz encodes
4780       the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4781    */
4782    if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4783        == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4784       UInt szLg2  = INSN(31,30);
4785       UInt szB    = 1 << szLg2;
4786       Bool isLoad = INSN(22,22) == 1;
4787       UInt imm9   = INSN(20,12);
4788       UInt nn     = INSN(9,5);
4789       UInt tt     = INSN(4,0);
4790       Bool wBack  = INSN(10,10) == 1;
4791       UInt how    = INSN(11,10);
4792       if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4793          /* undecodable; fall through */
4794       } else {
4795          if (nn == 31) { /* FIXME generate stack alignment check */ }
4796 
4797          // Compute the transfer address TA and the writeback address WA.
4798          IRTemp tRN = newTemp(Ity_I64);
4799          assign(tRN, getIReg64orSP(nn));
4800          IRTemp tEA = newTemp(Ity_I64);
4801          Long simm9 = (Long)sx_to_64(imm9, 9);
4802          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4803 
4804          IRTemp tTA = newTemp(Ity_I64);
4805          IRTemp tWA = newTemp(Ity_I64);
4806          switch (how) {
4807             case BITS2(0,1):
4808                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4809             case BITS2(1,1):
4810                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4811             case BITS2(0,0):
4812                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4813             default:
4814                vassert(0); /* NOTREACHED */
4815          }
4816 
4817          /* Normally rN would be updated after the transfer.  However, in
4818             the special case typifed by
4819                str x30, [sp,#-16]!
4820             it is necessary to update SP before the transfer, (1)
4821             because Memcheck will otherwise complain about a write
4822             below the stack pointer, and (2) because the segfault
4823             stack extension mechanism will otherwise extend the stack
4824             only down to SP before the instruction, which might not be
4825             far enough, if the -16 bit takes the actual access
4826             address to the next page.
4827          */
4828          Bool earlyWBack
4829            = wBack && simm9 < 0 && szB == 8
4830              && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
4831 
4832          if (wBack && earlyWBack)
4833             putIReg64orSP(nn, mkexpr(tEA));
4834 
4835          if (isLoad) {
4836             putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4837          } else {
4838             gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4839          }
4840 
4841          if (wBack && !earlyWBack)
4842             putIReg64orSP(nn, mkexpr(tEA));
4843 
4844          const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4845          const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4846          const HChar* fmt_str = NULL;
4847          switch (how) {
4848             case BITS2(0,1):
4849                fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4850                break;
4851             case BITS2(1,1):
4852                fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4853                break;
4854             case BITS2(0,0):
4855                fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4856                break;
4857             default:
4858                vassert(0);
4859          }
4860          DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4861                       nameIRegOrZR(szB == 8, tt),
4862                       nameIReg64orSP(nn), simm9);
4863          return True;
4864       }
4865    }
4866 
4867    /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4868    /* L==1 => mm==LD
4869       L==0 => mm==ST
4870       x==0 => 32 bit transfers, and zero extended loads
4871       x==1 => 64 bit transfers
4872       simm7 is scaled by the (single-register) transfer size
4873 
4874       (at-Rn-then-Rn=EA)
4875       x0 101 0001 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP], #imm
4876 
4877       (at-EA-then-Rn=EA)
4878       x0 101 0011 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]!
4879 
4880       (at-EA)
4881       x0 101 0010 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]
4882    */
4883    UInt insn_30_23 = INSN(30,23);
4884    if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
4885        || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
4886        || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
4887       UInt bL     = INSN(22,22);
4888       UInt bX     = INSN(31,31);
4889       UInt bWBack = INSN(23,23);
4890       UInt rT1    = INSN(4,0);
4891       UInt rN     = INSN(9,5);
4892       UInt rT2    = INSN(14,10);
4893       Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
4894       if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
4895           || (bL && rT1 == rT2)) {
4896          /* undecodable; fall through */
4897       } else {
4898          if (rN == 31) { /* FIXME generate stack alignment check */ }
4899 
4900          // Compute the transfer address TA and the writeback address WA.
4901          IRTemp tRN = newTemp(Ity_I64);
4902          assign(tRN, getIReg64orSP(rN));
4903          IRTemp tEA = newTemp(Ity_I64);
4904          simm7 = (bX ? 8 : 4) * simm7;
4905          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4906 
4907          IRTemp tTA = newTemp(Ity_I64);
4908          IRTemp tWA = newTemp(Ity_I64);
4909          switch (INSN(24,23)) {
4910             case BITS2(0,1):
4911                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4912             case BITS2(1,1):
4913                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4914             case BITS2(1,0):
4915                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4916             default:
4917                vassert(0); /* NOTREACHED */
4918          }
4919 
4920          /* Normally rN would be updated after the transfer.  However, in
4921             the special case typifed by
4922                stp x29, x30, [sp,#-112]!
4923             it is necessary to update SP before the transfer, (1)
4924             because Memcheck will otherwise complain about a write
4925             below the stack pointer, and (2) because the segfault
4926             stack extension mechanism will otherwise extend the stack
4927             only down to SP before the instruction, which might not be
4928             far enough, if the -112 bit takes the actual access
4929             address to the next page.
4930          */
4931          Bool earlyWBack
4932            = bWBack && simm7 < 0
4933              && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
4934 
4935          if (bWBack && earlyWBack)
4936             putIReg64orSP(rN, mkexpr(tEA));
4937 
4938          /**/ if (bL == 1 && bX == 1) {
4939             // 64 bit load
4940             putIReg64orZR(rT1, loadLE(Ity_I64,
4941                                       binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4942             putIReg64orZR(rT2, loadLE(Ity_I64,
4943                                       binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
4944          } else if (bL == 1 && bX == 0) {
4945             // 32 bit load
4946             putIReg32orZR(rT1, loadLE(Ity_I32,
4947                                       binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4948             putIReg32orZR(rT2, loadLE(Ity_I32,
4949                                       binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
4950          } else if (bL == 0 && bX == 1) {
4951             // 64 bit store
4952             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4953                     getIReg64orZR(rT1));
4954             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
4955                     getIReg64orZR(rT2));
4956          } else {
4957             vassert(bL == 0 && bX == 0);
4958             // 32 bit store
4959             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4960                     getIReg32orZR(rT1));
4961             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
4962                     getIReg32orZR(rT2));
4963          }
4964 
4965          if (bWBack && !earlyWBack)
4966             putIReg64orSP(rN, mkexpr(tEA));
4967 
4968          const HChar* fmt_str = NULL;
4969          switch (INSN(24,23)) {
4970             case BITS2(0,1):
4971                fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4972                break;
4973             case BITS2(1,1):
4974                fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4975                break;
4976             case BITS2(1,0):
4977                fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4978                break;
4979             default:
4980                vassert(0);
4981          }
4982          DIP(fmt_str, bL == 0 ? "st" : "ld",
4983                       nameIRegOrZR(bX == 1, rT1),
4984                       nameIRegOrZR(bX == 1, rT2),
4985                       nameIReg64orSP(rN), simm7);
4986          return True;
4987       }
4988    }
4989 
4990    /* -------- LDPSW (immediate, simm7) (INT REGS) -------- */
4991    /* Does 32 bit transfers which are sign extended to 64 bits.
4992       simm7 is scaled by the (single-register) transfer size
4993 
4994       (at-Rn-then-Rn=EA)
4995       01 101 0001 1 imm7 Rt2 Rn Rt1  LDPSW Rt1,Rt2, [Xn|SP], #imm
4996 
4997       (at-EA-then-Rn=EA)
4998       01 101 0011 1 imm7 Rt2 Rn Rt1  LDPSW Rt1,Rt2, [Xn|SP, #imm]!
4999 
5000       (at-EA)
5001       01 101 0010 1 imm7 Rt2 Rn Rt1  LDPSW Rt1,Rt2, [Xn|SP, #imm]
5002    */
5003    UInt insn_31_22 = INSN(31,22);
5004    if (insn_31_22 == BITS10(0,1,1,0,1,0,0,0,1,1)
5005        || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,1,1)
5006        || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,0,1)) {
5007       UInt bWBack = INSN(23,23);
5008       UInt rT1    = INSN(4,0);
5009       UInt rN     = INSN(9,5);
5010       UInt rT2    = INSN(14,10);
5011       Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
5012       if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
5013           || (rT1 == rT2)) {
5014          /* undecodable; fall through */
5015       } else {
5016          if (rN == 31) { /* FIXME generate stack alignment check */ }
5017 
5018          // Compute the transfer address TA and the writeback address WA.
5019          IRTemp tRN = newTemp(Ity_I64);
5020          assign(tRN, getIReg64orSP(rN));
5021          IRTemp tEA = newTemp(Ity_I64);
5022          simm7 = 4 * simm7;
5023          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5024 
5025          IRTemp tTA = newTemp(Ity_I64);
5026          IRTemp tWA = newTemp(Ity_I64);
5027          switch (INSN(24,23)) {
5028             case BITS2(0,1):
5029                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5030             case BITS2(1,1):
5031                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5032             case BITS2(1,0):
5033                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5034             default:
5035                vassert(0); /* NOTREACHED */
5036          }
5037 
5038          // 32 bit load, sign extended to 64 bits
5039          putIReg64orZR(rT1, unop(Iop_32Sto64,
5040                                  loadLE(Ity_I32, binop(Iop_Add64,
5041                                                        mkexpr(tTA),
5042                                                        mkU64(0)))));
5043          putIReg64orZR(rT2, unop(Iop_32Sto64,
5044                                  loadLE(Ity_I32, binop(Iop_Add64,
5045                                                        mkexpr(tTA),
5046                                                        mkU64(4)))));
5047          if (bWBack)
5048             putIReg64orSP(rN, mkexpr(tEA));
5049 
5050          const HChar* fmt_str = NULL;
5051          switch (INSN(24,23)) {
5052             case BITS2(0,1):
5053                fmt_str = "ldpsw %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5054                break;
5055             case BITS2(1,1):
5056                fmt_str = "ldpsw %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5057                break;
5058             case BITS2(1,0):
5059                fmt_str = "ldpsw %s, %s, [%s, #%lld] (at-Rn)\n";
5060                break;
5061             default:
5062                vassert(0);
5063          }
5064          DIP(fmt_str, nameIReg64orZR(rT1),
5065                       nameIReg64orZR(rT2),
5066                       nameIReg64orSP(rN), simm7);
5067          return True;
5068       }
5069    }
5070 
5071    /* ---------------- LDR (literal, int reg) ---------------- */
5072    /* 31 29      23    4
5073       00 011 000 imm19 Rt   LDR   Wt, [PC + sxTo64(imm19 << 2)]
5074       01 011 000 imm19 Rt   LDR   Xt, [PC + sxTo64(imm19 << 2)]
5075       10 011 000 imm19 Rt   LDRSW Xt, [PC + sxTo64(imm19 << 2)]
5076       11 011 000 imm19 Rt   prefetch  [PC + sxTo64(imm19 << 2)]
5077       Just handles the first two cases for now.
5078    */
5079    if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
5080       UInt  imm19 = INSN(23,5);
5081       UInt  rT    = INSN(4,0);
5082       UInt  bX    = INSN(30,30);
5083       ULong ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5084       if (bX) {
5085          putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
5086       } else {
5087          putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
5088       }
5089       DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
5090       return True;
5091    }
5092 
5093    /* -------------- {LD,ST}R (integer register) --------------- */
5094    /* 31 29        20 15     12 11 9  4
5095       |  |         |  |      |  |  |  |
5096       11 111000011 Rm option S  10 Rn Rt  LDR  Xt, [Xn|SP, R<m>{ext/sh}]
5097       10 111000011 Rm option S  10 Rn Rt  LDR  Wt, [Xn|SP, R<m>{ext/sh}]
5098       01 111000011 Rm option S  10 Rn Rt  LDRH Wt, [Xn|SP, R<m>{ext/sh}]
5099       00 111000011 Rm option S  10 Rn Rt  LDRB Wt, [Xn|SP, R<m>{ext/sh}]
5100 
5101       11 111000001 Rm option S  10 Rn Rt  STR  Xt, [Xn|SP, R<m>{ext/sh}]
5102       10 111000001 Rm option S  10 Rn Rt  STR  Wt, [Xn|SP, R<m>{ext/sh}]
5103       01 111000001 Rm option S  10 Rn Rt  STRH Wt, [Xn|SP, R<m>{ext/sh}]
5104       00 111000001 Rm option S  10 Rn Rt  STRB Wt, [Xn|SP, R<m>{ext/sh}]
5105    */
5106    if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
5107        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5108       HChar  dis_buf[64];
5109       UInt   szLg2 = INSN(31,30);
5110       Bool   isLD  = INSN(22,22) == 1;
5111       UInt   tt    = INSN(4,0);
5112       IRTemp ea    = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5113       if (ea != IRTemp_INVALID) {
5114          switch (szLg2) {
5115             case 3: /* 64 bit */
5116                if (isLD) {
5117                   putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
5118                   DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
5119                } else {
5120                   storeLE(mkexpr(ea), getIReg64orZR(tt));
5121                   DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
5122                }
5123                break;
5124             case 2: /* 32 bit */
5125                if (isLD) {
5126                   putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
5127                   DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
5128                } else {
5129                   storeLE(mkexpr(ea), getIReg32orZR(tt));
5130                   DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
5131                }
5132                break;
5133             case 1: /* 16 bit */
5134                if (isLD) {
5135                   putIReg64orZR(tt, unop(Iop_16Uto64,
5136                                          loadLE(Ity_I16, mkexpr(ea))));
5137                   DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5138                } else {
5139                   storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
5140                   DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5141                }
5142                break;
5143             case 0: /* 8 bit */
5144                if (isLD) {
5145                   putIReg64orZR(tt, unop(Iop_8Uto64,
5146                                          loadLE(Ity_I8, mkexpr(ea))));
5147                   DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
5148                } else {
5149                   storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
5150                   DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5151                }
5152                break;
5153             default:
5154                vassert(0);
5155          }
5156          return True;
5157       }
5158    }
5159 
5160    /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5161    /* 31 29  26  23 21    9 4
5162       10 111 001 10 imm12 n t   LDRSW Xt, [Xn|SP, #pimm12 * 4]
5163       01 111 001 1x imm12 n t   LDRSH Rt, [Xn|SP, #pimm12 * 2]
5164       00 111 001 1x imm12 n t   LDRSB Rt, [Xn|SP, #pimm12 * 1]
5165       where
5166          Rt is Wt when x==1, Xt when x==0
5167    */
5168    if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5169       /* Further checks on bits 31:30 and 22 */
5170       Bool valid = False;
5171       switch ((INSN(31,30) << 1) | INSN(22,22)) {
5172          case BITS3(1,0,0):
5173          case BITS3(0,1,0): case BITS3(0,1,1):
5174          case BITS3(0,0,0): case BITS3(0,0,1):
5175             valid = True;
5176             break;
5177       }
5178       if (valid) {
5179          UInt    szLg2 = INSN(31,30);
5180          UInt    bitX  = INSN(22,22);
5181          UInt    imm12 = INSN(21,10);
5182          UInt    nn    = INSN(9,5);
5183          UInt    tt    = INSN(4,0);
5184          UInt    szB   = 1 << szLg2;
5185          IRExpr* ea    = binop(Iop_Add64,
5186                                getIReg64orSP(nn), mkU64(imm12 * szB));
5187          switch (szB) {
5188             case 4:
5189                vassert(bitX == 0);
5190                putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5191                DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5192                    nameIReg64orSP(nn), imm12 * szB);
5193                break;
5194             case 2:
5195                if (bitX == 1) {
5196                   putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5197                } else {
5198                   putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5199                }
5200                DIP("ldrsh %s, [%s, #%u]\n",
5201                    nameIRegOrZR(bitX == 0, tt),
5202                    nameIReg64orSP(nn), imm12 * szB);
5203                break;
5204             case 1:
5205                if (bitX == 1) {
5206                   putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5207                } else {
5208                   putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5209                }
5210                DIP("ldrsb %s, [%s, #%u]\n",
5211                    nameIRegOrZR(bitX == 0, tt),
5212                    nameIReg64orSP(nn), imm12 * szB);
5213                break;
5214             default:
5215                vassert(0);
5216          }
5217          return True;
5218       }
5219       /* else fall through */
5220    }
5221 
5222    /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5223    /* (at-Rn-then-Rn=EA)
5224       31 29      23 21 20   11 9 4
5225       00 111 000 1x 0  imm9 01 n t  LDRSB Rt, [Xn|SP], #simm9
5226       01 111 000 1x 0  imm9 01 n t  LDRSH Rt, [Xn|SP], #simm9
5227       10 111 000 10 0  imm9 01 n t  LDRSW Xt, [Xn|SP], #simm9
5228 
5229       (at-EA-then-Rn=EA)
5230       00 111 000 1x 0  imm9 11 n t  LDRSB Rt, [Xn|SP, #simm9]!
5231       01 111 000 1x 0  imm9 11 n t  LDRSH Rt, [Xn|SP, #simm9]!
5232       10 111 000 10 0  imm9 11 n t  LDRSW Xt, [Xn|SP, #simm9]!
5233       where
5234          Rt is Wt when x==1, Xt when x==0
5235          transfer-at-Rn when [11]==0, at EA when [11]==1
5236    */
5237    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5238        && INSN(21,21) == 0 && INSN(10,10) == 1) {
5239       /* Further checks on bits 31:30 and 22 */
5240       Bool valid = False;
5241       switch ((INSN(31,30) << 1) | INSN(22,22)) {
5242          case BITS3(1,0,0):                    // LDRSW Xt
5243          case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5244          case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5245             valid = True;
5246             break;
5247       }
5248       if (valid) {
5249          UInt   szLg2 = INSN(31,30);
5250          UInt   imm9  = INSN(20,12);
5251          Bool   atRN  = INSN(11,11) == 0;
5252          UInt   nn    = INSN(9,5);
5253          UInt   tt    = INSN(4,0);
5254          IRTemp tRN   = newTemp(Ity_I64);
5255          IRTemp tEA   = newTemp(Ity_I64);
5256          IRTemp tTA   = IRTemp_INVALID;
5257          ULong  simm9 = sx_to_64(imm9, 9);
5258          Bool   is64  = INSN(22,22) == 0;
5259          assign(tRN, getIReg64orSP(nn));
5260          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5261          tTA = atRN ? tRN : tEA;
5262          HChar ch = '?';
5263          /* There are 5 cases:
5264                byte     load,           SX to 64
5265                byte     load, SX to 32, ZX to 64
5266                halfword load,           SX to 64
5267                halfword load, SX to 32, ZX to 64
5268                word     load,           SX to 64
5269             The ifs below handle them in the listed order.
5270          */
5271          if (szLg2 == 0) {
5272             ch = 'b';
5273             if (is64) {
5274                putIReg64orZR(tt, unop(Iop_8Sto64,
5275                                       loadLE(Ity_I8, mkexpr(tTA))));
5276             } else {
5277                putIReg32orZR(tt, unop(Iop_8Sto32,
5278                                       loadLE(Ity_I8, mkexpr(tTA))));
5279             }
5280          }
5281          else if (szLg2 == 1) {
5282             ch = 'h';
5283             if (is64) {
5284                putIReg64orZR(tt, unop(Iop_16Sto64,
5285                                       loadLE(Ity_I16, mkexpr(tTA))));
5286             } else {
5287                putIReg32orZR(tt, unop(Iop_16Sto32,
5288                                       loadLE(Ity_I16, mkexpr(tTA))));
5289             }
5290          }
5291          else if (szLg2 == 2 && is64) {
5292             ch = 'w';
5293             putIReg64orZR(tt, unop(Iop_32Sto64,
5294                                    loadLE(Ity_I32, mkexpr(tTA))));
5295          }
5296          else {
5297             vassert(0);
5298          }
5299          putIReg64orSP(nn, mkexpr(tEA));
5300          DIP(atRN ? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!",
5301              ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5302          return True;
5303       }
5304       /* else fall through */
5305    }
5306 
5307    /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5308    /* 31 29      23 21 20   11 9 4
5309       00 111 000 1x 0  imm9 00 n t  LDURSB Rt, [Xn|SP, #simm9]
5310       01 111 000 1x 0  imm9 00 n t  LDURSH Rt, [Xn|SP, #simm9]
5311       10 111 000 10 0  imm9 00 n t  LDURSW Xt, [Xn|SP, #simm9]
5312       where
5313          Rt is Wt when x==1, Xt when x==0
5314    */
5315    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5316        && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5317       /* Further checks on bits 31:30 and 22 */
5318       Bool valid = False;
5319       switch ((INSN(31,30) << 1) | INSN(22,22)) {
5320          case BITS3(1,0,0):                    // LDURSW Xt
5321          case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5322          case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5323             valid = True;
5324             break;
5325       }
5326       if (valid) {
5327          UInt   szLg2 = INSN(31,30);
5328          UInt   imm9  = INSN(20,12);
5329          UInt   nn    = INSN(9,5);
5330          UInt   tt    = INSN(4,0);
5331          IRTemp tRN   = newTemp(Ity_I64);
5332          IRTemp tEA   = newTemp(Ity_I64);
5333          ULong  simm9 = sx_to_64(imm9, 9);
5334          Bool   is64  = INSN(22,22) == 0;
5335          assign(tRN, getIReg64orSP(nn));
5336          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5337          HChar ch = '?';
5338          /* There are 5 cases:
5339                byte     load,           SX to 64
5340                byte     load, SX to 32, ZX to 64
5341                halfword load,           SX to 64
5342                halfword load, SX to 32, ZX to 64
5343                word     load,           SX to 64
5344             The ifs below handle them in the listed order.
5345          */
5346          if (szLg2 == 0) {
5347             ch = 'b';
5348             if (is64) {
5349                putIReg64orZR(tt, unop(Iop_8Sto64,
5350                                       loadLE(Ity_I8, mkexpr(tEA))));
5351             } else {
5352                putIReg32orZR(tt, unop(Iop_8Sto32,
5353                                       loadLE(Ity_I8, mkexpr(tEA))));
5354             }
5355          }
5356          else if (szLg2 == 1) {
5357             ch = 'h';
5358             if (is64) {
5359                putIReg64orZR(tt, unop(Iop_16Sto64,
5360                                       loadLE(Ity_I16, mkexpr(tEA))));
5361             } else {
5362                putIReg32orZR(tt, unop(Iop_16Sto32,
5363                                       loadLE(Ity_I16, mkexpr(tEA))));
5364             }
5365          }
5366          else if (szLg2 == 2 && is64) {
5367             ch = 'w';
5368             putIReg64orZR(tt, unop(Iop_32Sto64,
5369                                    loadLE(Ity_I32, mkexpr(tEA))));
5370          }
5371          else {
5372             vassert(0);
5373          }
5374          DIP("ldurs%c %s, [%s, #%lld]",
5375              ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), (Long)simm9);
5376          return True;
5377       }
5378       /* else fall through */
5379    }
5380 
5381    /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5382    /* L==1    => mm==LD
5383       L==0    => mm==ST
5384       sz==00  => 32 bit (S) transfers
5385       sz==01  => 64 bit (D) transfers
5386       sz==10  => 128 bit (Q) transfers
5387       sz==11  isn't allowed
5388       simm7 is scaled by the (single-register) transfer size
5389 
5390       31 29  26   22 21   14 9 4
5391 
5392       sz 101 1000 L  imm7 t2 n t1   mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5393                                     (at-EA, with nontemporal hint)
5394 
5395       sz 101 1001 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP], #imm
5396                                     (at-Rn-then-Rn=EA)
5397 
5398       sz 101 1010 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]
5399                                     (at-EA)
5400 
5401       sz 101 1011 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5402                                     (at-EA-then-Rn=EA)
5403    */
5404    if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5405       UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5406       Bool isLD   = INSN(22,22) == 1;
5407       Bool wBack  = INSN(23,23) == 1;
5408       Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
5409       UInt tt2    = INSN(14,10);
5410       UInt nn     = INSN(9,5);
5411       UInt tt1    = INSN(4,0);
5412       if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5413          /* undecodable; fall through */
5414       } else {
5415          if (nn == 31) { /* FIXME generate stack alignment check */ }
5416 
5417          // Compute the transfer address TA and the writeback address WA.
5418          UInt   szB = 4 << szSlg2; /* szB is the per-register size */
5419          IRTemp tRN = newTemp(Ity_I64);
5420          assign(tRN, getIReg64orSP(nn));
5421          IRTemp tEA = newTemp(Ity_I64);
5422          simm7 = szB * simm7;
5423          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5424 
5425          IRTemp tTA = newTemp(Ity_I64);
5426          IRTemp tWA = newTemp(Ity_I64);
5427          switch (INSN(24,23)) {
5428             case BITS2(0,1):
5429                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5430             case BITS2(1,1):
5431                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5432             case BITS2(1,0):
5433             case BITS2(0,0):
5434                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5435             default:
5436                vassert(0); /* NOTREACHED */
5437          }
5438 
5439          IRType ty = Ity_INVALID;
5440          switch (szB) {
5441             case 4:  ty = Ity_F32;  break;
5442             case 8:  ty = Ity_F64;  break;
5443             case 16: ty = Ity_V128; break;
5444             default: vassert(0);
5445          }
5446 
5447          /* Normally rN would be updated after the transfer.  However, in
5448             the special cases typifed by
5449                stp q0, q1, [sp,#-512]!
5450                stp d0, d1, [sp,#-512]!
5451                stp s0, s1, [sp,#-512]!
5452             it is necessary to update SP before the transfer, (1)
5453             because Memcheck will otherwise complain about a write
5454             below the stack pointer, and (2) because the segfault
5455             stack extension mechanism will otherwise extend the stack
5456             only down to SP before the instruction, which might not be
5457             far enough, if the -512 bit takes the actual access
5458             address to the next page.
5459          */
5460          Bool earlyWBack
5461            = wBack && simm7 < 0
5462              && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5463 
5464          if (wBack && earlyWBack)
5465             putIReg64orSP(nn, mkexpr(tEA));
5466 
5467          if (isLD) {
5468             if (szB < 16) {
5469                putQReg128(tt1, mkV128(0x0000));
5470             }
5471             putQRegLO(tt1,
5472                       loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
5473             if (szB < 16) {
5474                putQReg128(tt2, mkV128(0x0000));
5475             }
5476             putQRegLO(tt2,
5477                       loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
5478          } else {
5479             storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
5480                     getQRegLO(tt1, ty));
5481             storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
5482                     getQRegLO(tt2, ty));
5483          }
5484 
5485          if (wBack && !earlyWBack)
5486             putIReg64orSP(nn, mkexpr(tEA));
5487 
5488          const HChar* fmt_str = NULL;
5489          switch (INSN(24,23)) {
5490             case BITS2(0,1):
5491                fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5492                break;
5493             case BITS2(1,1):
5494                fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5495                break;
5496             case BITS2(1,0):
5497                fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5498                break;
5499             case BITS2(0,0):
5500                fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5501                break;
5502             default:
5503                vassert(0);
5504          }
5505          DIP(fmt_str, isLD ? "ld" : "st",
5506                       nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
5507                       nameIReg64orSP(nn), simm7);
5508          return True;
5509       }
5510    }
5511 
5512    /* -------------- {LD,ST}R (vector register) --------------- */
5513    /* 31 29     23  20 15     12 11 9  4
5514       |  |      |   |  |      |  |  |  |
5515       00 111100 011 Rm option S  10 Rn Rt  LDR Bt, [Xn|SP, R<m>{ext/sh}]
5516       01 111100 011 Rm option S  10 Rn Rt  LDR Ht, [Xn|SP, R<m>{ext/sh}]
5517       10 111100 011 Rm option S  10 Rn Rt  LDR St, [Xn|SP, R<m>{ext/sh}]
5518       11 111100 011 Rm option S  10 Rn Rt  LDR Dt, [Xn|SP, R<m>{ext/sh}]
5519       00 111100 111 Rm option S  10 Rn Rt  LDR Qt, [Xn|SP, R<m>{ext/sh}]
5520 
5521       00 111100 001 Rm option S  10 Rn Rt  STR Bt, [Xn|SP, R<m>{ext/sh}]
5522       01 111100 001 Rm option S  10 Rn Rt  STR Ht, [Xn|SP, R<m>{ext/sh}]
5523       10 111100 001 Rm option S  10 Rn Rt  STR St, [Xn|SP, R<m>{ext/sh}]
5524       11 111100 001 Rm option S  10 Rn Rt  STR Dt, [Xn|SP, R<m>{ext/sh}]
5525       00 111100 101 Rm option S  10 Rn Rt  STR Qt, [Xn|SP, R<m>{ext/sh}]
5526    */
5527    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5528        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5529       HChar  dis_buf[64];
5530       UInt   szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5531       Bool   isLD  = INSN(22,22) == 1;
5532       UInt   tt    = INSN(4,0);
5533       if (szLg2 > 4) goto after_LDR_STR_vector_register;
5534       IRTemp ea    = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5535       if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5536       switch (szLg2) {
5537          case 0: /* 8 bit */
5538             if (isLD) {
5539                putQReg128(tt, mkV128(0x0000));
5540                putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5541                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5542             } else {
5543                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5544                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5545             }
5546             break;
5547          case 1:
5548             if (isLD) {
5549                putQReg128(tt, mkV128(0x0000));
5550                putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5551                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5552             } else {
5553                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5554                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5555             }
5556             break;
5557          case 2: /* 32 bit */
5558             if (isLD) {
5559                putQReg128(tt, mkV128(0x0000));
5560                putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5561                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5562             } else {
5563                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5564                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5565             }
5566             break;
5567          case 3: /* 64 bit */
5568             if (isLD) {
5569                putQReg128(tt, mkV128(0x0000));
5570                putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5571                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5572             } else {
5573                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5574                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5575             }
5576             break;
5577          case 4:
5578             if (isLD) {
5579                putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5580                DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5581             } else {
5582                storeLE(mkexpr(ea), getQReg128(tt));
5583                DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5584             }
5585             break;
5586          default:
5587             vassert(0);
5588       }
5589       return True;
5590    }
5591   after_LDR_STR_vector_register:
5592 
5593    /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5594    /* 31 29      22 20 15  12 11 9  4
5595       |  |       |  |  |   |  |  |  |
5596       10 1110001 01 Rm opt S 10 Rn Rt    LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5597 
5598       01 1110001 01 Rm opt S 10 Rn Rt    LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5599       01 1110001 11 Rm opt S 10 Rn Rt    LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5600 
5601       00 1110001 01 Rm opt S 10 Rn Rt    LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5602       00 1110001 11 Rm opt S 10 Rn Rt    LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5603    */
5604    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5605        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5606       HChar  dis_buf[64];
5607       UInt   szLg2  = INSN(31,30);
5608       Bool   sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5609       UInt   tt     = INSN(4,0);
5610       if (szLg2 == 3) goto after_LDRS_integer_register;
5611       IRTemp ea     = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5612       if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5613       /* Enumerate the 5 variants explicitly. */
5614       if (szLg2 == 2/*32 bit*/ && sxTo64) {
5615          putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5616          DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5617          return True;
5618       }
5619       else
5620       if (szLg2 == 1/*16 bit*/) {
5621          if (sxTo64) {
5622             putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5623             DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5624          } else {
5625             putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5626             DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5627          }
5628          return True;
5629       }
5630       else
5631       if (szLg2 == 0/*8 bit*/) {
5632          if (sxTo64) {
5633             putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5634             DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5635          } else {
5636             putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5637             DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5638          }
5639          return True;
5640       }
5641       /* else it's an invalid combination */
5642    }
5643   after_LDRS_integer_register:
5644 
5645    /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5646    /* This is the Unsigned offset variant only.  The Post-Index and
5647       Pre-Index variants are below.
5648 
5649       31 29      23 21    9 4
5650       00 111 101 01 imm12 n t   LDR Bt, [Xn|SP + imm12 * 1]
5651       01 111 101 01 imm12 n t   LDR Ht, [Xn|SP + imm12 * 2]
5652       10 111 101 01 imm12 n t   LDR St, [Xn|SP + imm12 * 4]
5653       11 111 101 01 imm12 n t   LDR Dt, [Xn|SP + imm12 * 8]
5654       00 111 101 11 imm12 n t   LDR Qt, [Xn|SP + imm12 * 16]
5655 
5656       00 111 101 00 imm12 n t   STR Bt, [Xn|SP + imm12 * 1]
5657       01 111 101 00 imm12 n t   STR Ht, [Xn|SP + imm12 * 2]
5658       10 111 101 00 imm12 n t   STR St, [Xn|SP + imm12 * 4]
5659       11 111 101 00 imm12 n t   STR Dt, [Xn|SP + imm12 * 8]
5660       00 111 101 10 imm12 n t   STR Qt, [Xn|SP + imm12 * 16]
5661    */
5662    if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5663        && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5664       UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
5665       Bool   isLD   = INSN(22,22) == 1;
5666       UInt   pimm12 = INSN(21,10) << szLg2;
5667       UInt   nn     = INSN(9,5);
5668       UInt   tt     = INSN(4,0);
5669       IRTemp tEA    = newTemp(Ity_I64);
5670       IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
5671       assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5672       if (isLD) {
5673          if (szLg2 < 4) {
5674             putQReg128(tt, mkV128(0x0000));
5675          }
5676          putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5677       } else {
5678          storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5679       }
5680       DIP("%s %s, [%s, #%u]\n",
5681           isLD ? "ldr" : "str",
5682           nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
5683       return True;
5684    }
5685 
5686    /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5687    /* These are the Post-Index and Pre-Index variants.
5688 
5689       31 29      23   20   11 9 4
5690       (at-Rn-then-Rn=EA)
5691       00 111 100 01 0 imm9 01 n t   LDR Bt, [Xn|SP], #simm
5692       01 111 100 01 0 imm9 01 n t   LDR Ht, [Xn|SP], #simm
5693       10 111 100 01 0 imm9 01 n t   LDR St, [Xn|SP], #simm
5694       11 111 100 01 0 imm9 01 n t   LDR Dt, [Xn|SP], #simm
5695       00 111 100 11 0 imm9 01 n t   LDR Qt, [Xn|SP], #simm
5696 
5697       (at-EA-then-Rn=EA)
5698       00 111 100 01 0 imm9 11 n t   LDR Bt, [Xn|SP, #simm]!
5699       01 111 100 01 0 imm9 11 n t   LDR Ht, [Xn|SP, #simm]!
5700       10 111 100 01 0 imm9 11 n t   LDR St, [Xn|SP, #simm]!
5701       11 111 100 01 0 imm9 11 n t   LDR Dt, [Xn|SP, #simm]!
5702       00 111 100 11 0 imm9 11 n t   LDR Qt, [Xn|SP, #simm]!
5703 
5704       Stores are the same except with bit 22 set to 0.
5705    */
5706    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5707        && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5708        && INSN(21,21) == 0 && INSN(10,10) == 1) {
5709       UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
5710       Bool   isLD   = INSN(22,22) == 1;
5711       UInt   imm9   = INSN(20,12);
5712       Bool   atRN   = INSN(11,11) == 0;
5713       UInt   nn     = INSN(9,5);
5714       UInt   tt     = INSN(4,0);
5715       IRTemp tRN    = newTemp(Ity_I64);
5716       IRTemp tEA    = newTemp(Ity_I64);
5717       IRTemp tTA    = IRTemp_INVALID;
5718       IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
5719       ULong  simm9  = sx_to_64(imm9, 9);
5720       assign(tRN, getIReg64orSP(nn));
5721       assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5722       tTA = atRN ? tRN : tEA;
5723       if (isLD) {
5724          if (szLg2 < 4) {
5725             putQReg128(tt, mkV128(0x0000));
5726          }
5727          putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
5728       } else {
5729          storeLE(mkexpr(tTA), getQRegLO(tt, ty));
5730       }
5731       putIReg64orSP(nn, mkexpr(tEA));
5732       DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5733           isLD ? "ldr" : "str",
5734           nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5735       return True;
5736    }
5737 
5738    /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5739    /* 31 29      23   20   11 9 4
5740       00 111 100 01 0 imm9 00 n t   LDR Bt, [Xn|SP, #simm]
5741       01 111 100 01 0 imm9 00 n t   LDR Ht, [Xn|SP, #simm]
5742       10 111 100 01 0 imm9 00 n t   LDR St, [Xn|SP, #simm]
5743       11 111 100 01 0 imm9 00 n t   LDR Dt, [Xn|SP, #simm]
5744       00 111 100 11 0 imm9 00 n t   LDR Qt, [Xn|SP, #simm]
5745 
5746       00 111 100 00 0 imm9 00 n t   STR Bt, [Xn|SP, #simm]
5747       01 111 100 00 0 imm9 00 n t   STR Ht, [Xn|SP, #simm]
5748       10 111 100 00 0 imm9 00 n t   STR St, [Xn|SP, #simm]
5749       11 111 100 00 0 imm9 00 n t   STR Dt, [Xn|SP, #simm]
5750       00 111 100 10 0 imm9 00 n t   STR Qt, [Xn|SP, #simm]
5751    */
5752    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5753        && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5754        && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5755       UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
5756       Bool   isLD   = INSN(22,22) == 1;
5757       UInt   imm9   = INSN(20,12);
5758       UInt   nn     = INSN(9,5);
5759       UInt   tt     = INSN(4,0);
5760       ULong  simm9  = sx_to_64(imm9, 9);
5761       IRTemp tEA    = newTemp(Ity_I64);
5762       IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
5763       assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5764       if (isLD) {
5765          if (szLg2 < 4) {
5766             putQReg128(tt, mkV128(0x0000));
5767          }
5768          putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5769       } else {
5770          storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5771       }
5772       DIP("%s %s, [%s, #%lld]\n",
5773           isLD ? "ldur" : "stur",
5774           nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5775       return True;
5776    }
5777 
5778    /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5779    /* 31 29      23    4
5780       00 011 100 imm19 t    LDR St, [PC + sxTo64(imm19 << 2)]
5781       01 011 100 imm19 t    LDR Dt, [PC + sxTo64(imm19 << 2)]
5782       10 011 100 imm19 t    LDR Qt, [PC + sxTo64(imm19 << 2)]
5783    */
5784    if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5785       UInt   szB   = 4 << INSN(31,30);
5786       UInt   imm19 = INSN(23,5);
5787       UInt   tt    = INSN(4,0);
5788       ULong  ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5789       IRType ty    = preferredVectorSubTypeFromSize(szB);
5790       putQReg128(tt, mkV128(0x0000));
5791       putQRegLO(tt, loadLE(ty, mkU64(ea)));
5792       DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
5793       return True;
5794    }
5795 
5796    /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg  ------ */
5797    /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5798    /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5799    /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5800    /* 31 29  26   22 21 20    15   11 9 4
5801 
5802       0q 001 1000 L  0  00000 0000 sz n t  xx4 {Vt..t+3.T}, [Xn|SP]
5803       0q 001 1001 L  0  m     0000 sz n t  xx4 {Vt..t+3.T}, [Xn|SP], step
5804 
5805       0q 001 1000 L  0  00000 0100 sz n t  xx3 {Vt..t+2.T}, [Xn|SP]
5806       0q 001 1001 L  0  m     0100 sz n t  xx3 {Vt..t+2.T}, [Xn|SP], step
5807 
5808       0q 001 1000 L  0  00000 1000 sz n t  xx2 {Vt..t+1.T}, [Xn|SP]
5809       0q 001 1001 L  0  m     1000 sz n t  xx2 {Vt..t+1.T}, [Xn|SP], step
5810 
5811       0q 001 1000 L  0  00000 0111 sz n t  xx1 {Vt.T},      [Xn|SP]
5812       0q 001 1001 L  0  m     0111 sz n t  xx1 {Vt.T},      [Xn|SP], step
5813 
5814       T    = defined by Q and sz in the normal way
5815       step = if m == 11111 then transfer-size else Xm
5816       xx   = case L of 1 -> LD ; 0 -> ST
5817    */
5818    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5819        && INSN(21,21) == 0) {
5820       Bool bitQ  = INSN(30,30);
5821       Bool isPX  = INSN(23,23) == 1;
5822       Bool isLD  = INSN(22,22) == 1;
5823       UInt mm    = INSN(20,16);
5824       UInt opc   = INSN(15,12);
5825       UInt sz    = INSN(11,10);
5826       UInt nn    = INSN(9,5);
5827       UInt tt    = INSN(4,0);
5828       Bool isQ   = bitQ == 1;
5829       Bool is1d  = sz == BITS2(1,1) && !isQ;
5830       UInt nRegs = 0;
5831       switch (opc) {
5832          case BITS4(0,0,0,0): nRegs = 4; break;
5833          case BITS4(0,1,0,0): nRegs = 3; break;
5834          case BITS4(1,0,0,0): nRegs = 2; break;
5835          case BITS4(0,1,1,1): nRegs = 1; break;
5836          default: break;
5837       }
5838 
5839       /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5840          If we see it, set nRegs to 0 so as to cause the next conditional
5841          to fail. */
5842       if (!isPX && mm != 0)
5843          nRegs = 0;
5844 
5845       if (nRegs == 1                             /* .1d is allowed */
5846           || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
5847 
5848          UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5849 
5850          /* Generate the transfer address (TA) and if necessary the
5851             writeback address (WB) */
5852          IRTemp tTA = newTemp(Ity_I64);
5853          assign(tTA, getIReg64orSP(nn));
5854          if (nn == 31) { /* FIXME generate stack alignment check */ }
5855          IRTemp tWB = IRTemp_INVALID;
5856          if (isPX) {
5857             tWB = newTemp(Ity_I64);
5858             assign(tWB, binop(Iop_Add64,
5859                               mkexpr(tTA),
5860                               mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5861                                                      : getIReg64orZR(mm)));
5862          }
5863 
5864          /* -- BEGIN generate the transfers -- */
5865 
5866          IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
5867          u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
5868          switch (nRegs) {
5869             case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
5870             case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
5871             case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
5872             case 1: u0 = newTempV128(); i0 = newTempV128(); break;
5873             default: vassert(0);
5874          }
5875 
5876          /* -- Multiple 128 or 64 bit stores -- */
5877          if (!isLD) {
5878             switch (nRegs) {
5879                case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5880                case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5881                case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
5882                case 1: assign(u0, getQReg128((tt+0) % 32)); break;
5883                default: vassert(0);
5884             }
5885             switch (nRegs) {
5886                case 4:  (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
5887                            (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
5888                         break;
5889                case 3:  (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
5890                            (&i0, &i1, &i2, sz, u0, u1, u2);
5891                         break;
5892                case 2:  (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
5893                            (&i0, &i1, sz, u0, u1);
5894                         break;
5895                case 1:  (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
5896                            (&i0, sz, u0);
5897                         break;
5898                default: vassert(0);
5899             }
5900 #           define MAYBE_NARROW_TO_64(_expr) \
5901                       (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5902             UInt step = isQ ? 16 : 8;
5903             switch (nRegs) {
5904                case 4:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5905                                  MAYBE_NARROW_TO_64(mkexpr(i3)) );
5906                         /* fallthru */
5907                case 3:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5908                                  MAYBE_NARROW_TO_64(mkexpr(i2)) );
5909                         /* fallthru */
5910                case 2:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5911                                  MAYBE_NARROW_TO_64(mkexpr(i1)) );
5912                         /* fallthru */
5913                case 1:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5914                                  MAYBE_NARROW_TO_64(mkexpr(i0)) );
5915                         break;
5916                default: vassert(0);
5917             }
5918 #           undef MAYBE_NARROW_TO_64
5919          }
5920 
5921          /* -- Multiple 128 or 64 bit loads -- */
5922          else /* isLD */ {
5923             UInt   step   = isQ ? 16 : 8;
5924             IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5925 #           define MAYBE_WIDEN_FROM_64(_expr) \
5926                       (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5927             switch (nRegs) {
5928                case 4:
5929                   assign(i3, MAYBE_WIDEN_FROM_64(
5930                                 loadLE(loadTy,
5931                                        binop(Iop_Add64, mkexpr(tTA),
5932                                                         mkU64(3 * step)))));
5933                   /* fallthru */
5934                case 3:
5935                   assign(i2, MAYBE_WIDEN_FROM_64(
5936                                 loadLE(loadTy,
5937                                        binop(Iop_Add64, mkexpr(tTA),
5938                                                         mkU64(2 * step)))));
5939                   /* fallthru */
5940                case 2:
5941                   assign(i1, MAYBE_WIDEN_FROM_64(
5942                                 loadLE(loadTy,
5943                                        binop(Iop_Add64, mkexpr(tTA),
5944                                                         mkU64(1 * step)))));
5945                   /* fallthru */
5946                case 1:
5947                   assign(i0, MAYBE_WIDEN_FROM_64(
5948                                 loadLE(loadTy,
5949                                        binop(Iop_Add64, mkexpr(tTA),
5950                                                         mkU64(0 * step)))));
5951                   break;
5952                default:
5953                   vassert(0);
5954             }
5955 #           undef MAYBE_WIDEN_FROM_64
5956             switch (nRegs) {
5957                case 4:  (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
5958                            (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
5959                         break;
5960                case 3:  (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
5961                            (&u0, &u1, &u2, sz, i0, i1, i2);
5962                         break;
5963                case 2:  (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
5964                            (&u0, &u1, sz, i0, i1);
5965                         break;
5966                case 1:  (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
5967                            (&u0, sz, i0);
5968                         break;
5969                default: vassert(0);
5970             }
5971             switch (nRegs) {
5972                case 4:  putQReg128( (tt+3) % 32,
5973                                     math_MAYBE_ZERO_HI64(bitQ, u3));
5974                         /* fallthru */
5975                case 3:  putQReg128( (tt+2) % 32,
5976                                     math_MAYBE_ZERO_HI64(bitQ, u2));
5977                         /* fallthru */
5978                case 2:  putQReg128( (tt+1) % 32,
5979                                     math_MAYBE_ZERO_HI64(bitQ, u1));
5980                         /* fallthru */
5981                case 1:  putQReg128( (tt+0) % 32,
5982                                     math_MAYBE_ZERO_HI64(bitQ, u0));
5983                         break;
5984                default: vassert(0);
5985             }
5986          }
5987 
5988          /* -- END generate the transfers -- */
5989 
5990          /* Do the writeback, if necessary */
5991          if (isPX) {
5992             putIReg64orSP(nn, mkexpr(tWB));
5993          }
5994 
5995          HChar pxStr[20];
5996          pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
5997          if (isPX) {
5998             if (mm == BITS5(1,1,1,1,1))
5999                vex_sprintf(pxStr, ", #%u", xferSzB);
6000             else
6001                vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6002          }
6003          const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6004          DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
6005              isLD ? "ld" : "st", nRegs,
6006              (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6007              pxStr);
6008 
6009          return True;
6010       }
6011       /* else fall through */
6012    }
6013 
6014    /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs  ------ */
6015    /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs  ------ */
6016    /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs  ------ */
6017    /* 31 29  26   22 21 20    15   11 9 4
6018 
6019       0q 001 1000 L  0  00000 0010 sz n t  xx1 {Vt..t+3.T}, [Xn|SP]
6020       0q 001 1001 L  0  m     0010 sz n t  xx1 {Vt..t+3.T}, [Xn|SP], step
6021 
6022       0q 001 1000 L  0  00000 0110 sz n t  xx1 {Vt..t+2.T}, [Xn|SP]
6023       0q 001 1001 L  0  m     0110 sz n t  xx1 {Vt..t+2.T}, [Xn|SP], step
6024 
6025       0q 001 1000 L  0  00000 1010 sz n t  xx1 {Vt..t+1.T}, [Xn|SP]
6026       0q 001 1001 L  0  m     1010 sz n t  xx1 {Vt..t+1.T}, [Xn|SP], step
6027 
6028       T    = defined by Q and sz in the normal way
6029       step = if m == 11111 then transfer-size else Xm
6030       xx   = case L of 1 -> LD ; 0 -> ST
6031    */
6032    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6033        && INSN(21,21) == 0) {
6034       Bool bitQ  = INSN(30,30);
6035       Bool isPX  = INSN(23,23) == 1;
6036       Bool isLD  = INSN(22,22) == 1;
6037       UInt mm    = INSN(20,16);
6038       UInt opc   = INSN(15,12);
6039       UInt sz    = INSN(11,10);
6040       UInt nn    = INSN(9,5);
6041       UInt tt    = INSN(4,0);
6042       Bool isQ   = bitQ == 1;
6043       UInt nRegs = 0;
6044       switch (opc) {
6045          case BITS4(0,0,1,0): nRegs = 4; break;
6046          case BITS4(0,1,1,0): nRegs = 3; break;
6047          case BITS4(1,0,1,0): nRegs = 2; break;
6048          default: break;
6049       }
6050 
6051       /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6052          If we see it, set nRegs to 0 so as to cause the next conditional
6053          to fail. */
6054       if (!isPX && mm != 0)
6055          nRegs = 0;
6056 
6057       if (nRegs >= 2 && nRegs <= 4) {
6058 
6059          UInt xferSzB = (isQ ? 16 : 8) * nRegs;
6060 
6061          /* Generate the transfer address (TA) and if necessary the
6062             writeback address (WB) */
6063          IRTemp tTA = newTemp(Ity_I64);
6064          assign(tTA, getIReg64orSP(nn));
6065          if (nn == 31) { /* FIXME generate stack alignment check */ }
6066          IRTemp tWB = IRTemp_INVALID;
6067          if (isPX) {
6068             tWB = newTemp(Ity_I64);
6069             assign(tWB, binop(Iop_Add64,
6070                               mkexpr(tTA),
6071                               mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6072                                                      : getIReg64orZR(mm)));
6073          }
6074 
6075          /* -- BEGIN generate the transfers -- */
6076 
6077          IRTemp u0, u1, u2, u3;
6078          u0 = u1 = u2 = u3 = IRTemp_INVALID;
6079          switch (nRegs) {
6080             case 4: u3 = newTempV128(); /* fallthru */
6081             case 3: u2 = newTempV128(); /* fallthru */
6082             case 2: u1 = newTempV128();
6083                     u0 = newTempV128(); break;
6084             default: vassert(0);
6085          }
6086 
6087          /* -- Multiple 128 or 64 bit stores -- */
6088          if (!isLD) {
6089             switch (nRegs) {
6090                case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
6091                case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
6092                case 2: assign(u1, getQReg128((tt+1) % 32));
6093                        assign(u0, getQReg128((tt+0) % 32)); break;
6094                default: vassert(0);
6095             }
6096 #           define MAYBE_NARROW_TO_64(_expr) \
6097                       (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6098             UInt step = isQ ? 16 : 8;
6099             switch (nRegs) {
6100                case 4:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
6101                                  MAYBE_NARROW_TO_64(mkexpr(u3)) );
6102                         /* fallthru */
6103                case 3:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
6104                                  MAYBE_NARROW_TO_64(mkexpr(u2)) );
6105                         /* fallthru */
6106                case 2:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
6107                                  MAYBE_NARROW_TO_64(mkexpr(u1)) );
6108                         storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
6109                                  MAYBE_NARROW_TO_64(mkexpr(u0)) );
6110                         break;
6111                default: vassert(0);
6112             }
6113 #           undef MAYBE_NARROW_TO_64
6114          }
6115 
6116          /* -- Multiple 128 or 64 bit loads -- */
6117          else /* isLD */ {
6118             UInt   step   = isQ ? 16 : 8;
6119             IRType loadTy = isQ ? Ity_V128 : Ity_I64;
6120 #           define MAYBE_WIDEN_FROM_64(_expr) \
6121                       (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6122             switch (nRegs) {
6123                case 4:
6124                   assign(u3, MAYBE_WIDEN_FROM_64(
6125                                 loadLE(loadTy,
6126                                        binop(Iop_Add64, mkexpr(tTA),
6127                                                         mkU64(3 * step)))));
6128                   /* fallthru */
6129                case 3:
6130                   assign(u2, MAYBE_WIDEN_FROM_64(
6131                                 loadLE(loadTy,
6132                                        binop(Iop_Add64, mkexpr(tTA),
6133                                                         mkU64(2 * step)))));
6134                   /* fallthru */
6135                case 2:
6136                   assign(u1, MAYBE_WIDEN_FROM_64(
6137                                 loadLE(loadTy,
6138                                        binop(Iop_Add64, mkexpr(tTA),
6139                                                         mkU64(1 * step)))));
6140                   assign(u0, MAYBE_WIDEN_FROM_64(
6141                                 loadLE(loadTy,
6142                                        binop(Iop_Add64, mkexpr(tTA),
6143                                                         mkU64(0 * step)))));
6144                   break;
6145                default:
6146                   vassert(0);
6147             }
6148 #           undef MAYBE_WIDEN_FROM_64
6149             switch (nRegs) {
6150                case 4:  putQReg128( (tt+3) % 32,
6151                                     math_MAYBE_ZERO_HI64(bitQ, u3));
6152                         /* fallthru */
6153                case 3:  putQReg128( (tt+2) % 32,
6154                                     math_MAYBE_ZERO_HI64(bitQ, u2));
6155                         /* fallthru */
6156                case 2:  putQReg128( (tt+1) % 32,
6157                                     math_MAYBE_ZERO_HI64(bitQ, u1));
6158                         putQReg128( (tt+0) % 32,
6159                                     math_MAYBE_ZERO_HI64(bitQ, u0));
6160                         break;
6161                default: vassert(0);
6162             }
6163          }
6164 
6165          /* -- END generate the transfers -- */
6166 
6167          /* Do the writeback, if necessary */
6168          if (isPX) {
6169             putIReg64orSP(nn, mkexpr(tWB));
6170          }
6171 
6172          HChar pxStr[20];
6173          pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6174          if (isPX) {
6175             if (mm == BITS5(1,1,1,1,1))
6176                vex_sprintf(pxStr, ", #%u", xferSzB);
6177             else
6178                vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6179          }
6180          const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6181          DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6182              isLD ? "ld" : "st",
6183              (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6184              pxStr);
6185 
6186          return True;
6187       }
6188       /* else fall through */
6189    }
6190 
6191    /* ---------- LD1R (single structure, replicate) ---------- */
6192    /* ---------- LD2R (single structure, replicate) ---------- */
6193    /* ---------- LD3R (single structure, replicate) ---------- */
6194    /* ---------- LD4R (single structure, replicate) ---------- */
6195    /* 31 29       22 20    15    11 9 4
6196       0q 001 1010 10 00000 110 0 sz n t  LD1R {Vt.T}, [Xn|SP]
6197       0q 001 1011 10 m     110 0 sz n t  LD1R {Vt.T}, [Xn|SP], step
6198 
6199       0q 001 1010 11 00000 110 0 sz n t  LD2R {Vt..t+1.T}, [Xn|SP]
6200       0q 001 1011 11 m     110 0 sz n t  LD2R {Vt..t+1.T}, [Xn|SP], step
6201 
6202       0q 001 1010 10 00000 111 0 sz n t  LD3R {Vt..t+2.T}, [Xn|SP]
6203       0q 001 1011 10 m     111 0 sz n t  LD3R {Vt..t+2.T}, [Xn|SP], step
6204 
6205       0q 001 1010 11 00000 111 0 sz n t  LD4R {Vt..t+3.T}, [Xn|SP]
6206       0q 001 1011 11 m     111 0 sz n t  LD4R {Vt..t+3.T}, [Xn|SP], step
6207 
6208       step = if m == 11111 then transfer-size else Xm
6209    */
6210    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6211        && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6212        && INSN(12,12) == 0) {
6213       UInt   bitQ  = INSN(30,30);
6214       Bool   isPX  = INSN(23,23) == 1;
6215       UInt   nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6216       UInt   mm    = INSN(20,16);
6217       UInt   sz    = INSN(11,10);
6218       UInt   nn    = INSN(9,5);
6219       UInt   tt    = INSN(4,0);
6220 
6221       /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6222       if (isPX || mm == 0) {
6223 
6224          IRType ty    = integerIRTypeOfSize(1 << sz);
6225 
6226          UInt laneSzB = 1 << sz;
6227          UInt xferSzB = laneSzB * nRegs;
6228 
6229          /* Generate the transfer address (TA) and if necessary the
6230             writeback address (WB) */
6231          IRTemp tTA = newTemp(Ity_I64);
6232          assign(tTA, getIReg64orSP(nn));
6233          if (nn == 31) { /* FIXME generate stack alignment check */ }
6234          IRTemp tWB = IRTemp_INVALID;
6235          if (isPX) {
6236             tWB = newTemp(Ity_I64);
6237             assign(tWB, binop(Iop_Add64,
6238                               mkexpr(tTA),
6239                               mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6240                                                      : getIReg64orZR(mm)));
6241          }
6242 
6243          /* Do the writeback, if necessary */
6244          if (isPX) {
6245             putIReg64orSP(nn, mkexpr(tWB));
6246          }
6247 
6248          IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6249          e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6250          switch (nRegs) {
6251             case 4:
6252                e3 = newTemp(ty);
6253                assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6254                                                       mkU64(3 * laneSzB))));
6255                v3 = math_DUP_TO_V128(e3, ty);
6256                putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6257                /* fallthrough */
6258             case 3:
6259                e2 = newTemp(ty);
6260                assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6261                                                       mkU64(2 * laneSzB))));
6262                v2 = math_DUP_TO_V128(e2, ty);
6263                putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6264                /* fallthrough */
6265             case 2:
6266                e1 = newTemp(ty);
6267                assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6268                                                       mkU64(1 * laneSzB))));
6269                v1 = math_DUP_TO_V128(e1, ty);
6270                putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6271                /* fallthrough */
6272             case 1:
6273                e0 = newTemp(ty);
6274                assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6275                                                       mkU64(0 * laneSzB))));
6276                v0 = math_DUP_TO_V128(e0, ty);
6277                putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6278                break;
6279             default:
6280                vassert(0);
6281          }
6282 
6283          HChar pxStr[20];
6284          pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6285          if (isPX) {
6286             if (mm == BITS5(1,1,1,1,1))
6287                vex_sprintf(pxStr, ", #%u", xferSzB);
6288             else
6289                vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6290          }
6291          const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6292          DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6293              nRegs,
6294              (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6295              pxStr);
6296 
6297          return True;
6298       }
6299       /* else fall through */
6300    }
6301 
6302    /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6303    /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6304    /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6305    /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6306    /* 31 29       22 21 20    15    11 9 4
6307       0q 001 1010 L  0  00000 xx0 S sz n t  op1 {Vt.T}[ix], [Xn|SP]
6308       0q 001 1011 L  0  m     xx0 S sz n t  op1 {Vt.T}[ix], [Xn|SP], step
6309 
6310       0q 001 1010 L  1  00000 xx0 S sz n t  op2 {Vt..t+1.T}[ix], [Xn|SP]
6311       0q 001 1011 L  1  m     xx0 S sz n t  op2 {Vt..t+1.T}[ix], [Xn|SP], step
6312 
6313       0q 001 1010 L  0  00000 xx1 S sz n t  op3 {Vt..t+2.T}[ix], [Xn|SP]
6314       0q 001 1011 L  0  m     xx1 S sz n t  op3 {Vt..t+2.T}[ix], [Xn|SP], step
6315 
6316       0q 001 1010 L  1  00000 xx1 S sz n t  op4 {Vt..t+3.T}[ix], [Xn|SP]
6317       0q 001 1011 L  1  m     xx1 S sz n t  op4 {Vt..t+3.T}[ix], [Xn|SP], step
6318 
6319       step = if m == 11111 then transfer-size else Xm
6320       op   = case L of 1 -> LD ; 0 -> ST
6321 
6322       laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6323                                      01:b:b:b0 -> 2, bbb
6324                                      10:b:b:00 -> 4, bb
6325                                      10:b:0:01 -> 8, b
6326    */
6327    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6328       UInt   bitQ  = INSN(30,30);
6329       Bool   isPX  = INSN(23,23) == 1;
6330       Bool   isLD  = INSN(22,22) == 1;
6331       UInt   nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6332       UInt   mm    = INSN(20,16);
6333       UInt   xx    = INSN(15,14);
6334       UInt   bitS  = INSN(12,12);
6335       UInt   sz    = INSN(11,10);
6336       UInt   nn    = INSN(9,5);
6337       UInt   tt    = INSN(4,0);
6338 
6339       Bool valid = True;
6340 
6341       /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6342       if (!isPX && mm != 0)
6343          valid = False;
6344 
6345       UInt laneSzB = 0;  /* invalid */
6346       UInt ix      = 16; /* invalid */
6347 
6348       UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6349       switch (xx_q_S_sz) {
6350          case 0x00: case 0x01: case 0x02: case 0x03:
6351          case 0x04: case 0x05: case 0x06: case 0x07:
6352          case 0x08: case 0x09: case 0x0A: case 0x0B:
6353          case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6354             laneSzB = 1; ix = xx_q_S_sz & 0xF;
6355             break;
6356          case 0x10: case 0x12: case 0x14: case 0x16:
6357          case 0x18: case 0x1A: case 0x1C: case 0x1E:
6358             laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6359             break;
6360          case 0x20: case 0x24: case 0x28: case 0x2C:
6361             laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6362             break;
6363          case 0x21: case 0x29:
6364             laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6365             break;
6366          default:
6367             break;
6368       }
6369 
6370       if (valid && laneSzB != 0) {
6371 
6372          IRType ty      = integerIRTypeOfSize(laneSzB);
6373          UInt   xferSzB = laneSzB * nRegs;
6374 
6375          /* Generate the transfer address (TA) and if necessary the
6376             writeback address (WB) */
6377          IRTemp tTA = newTemp(Ity_I64);
6378          assign(tTA, getIReg64orSP(nn));
6379          if (nn == 31) { /* FIXME generate stack alignment check */ }
6380          IRTemp tWB = IRTemp_INVALID;
6381          if (isPX) {
6382             tWB = newTemp(Ity_I64);
6383             assign(tWB, binop(Iop_Add64,
6384                               mkexpr(tTA),
6385                               mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6386                                                      : getIReg64orZR(mm)));
6387          }
6388 
6389          /* Do the writeback, if necessary */
6390          if (isPX) {
6391             putIReg64orSP(nn, mkexpr(tWB));
6392          }
6393 
6394          switch (nRegs) {
6395             case 4: {
6396                IRExpr* addr
6397                   = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6398                if (isLD) {
6399                   putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6400                } else {
6401                   storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6402                }
6403                /* fallthrough */
6404             }
6405             case 3: {
6406                IRExpr* addr
6407                   = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6408                if (isLD) {
6409                   putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6410                } else {
6411                   storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6412                }
6413                /* fallthrough */
6414             }
6415             case 2: {
6416                IRExpr* addr
6417                   = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6418                if (isLD) {
6419                   putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6420                } else {
6421                   storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6422                }
6423                /* fallthrough */
6424             }
6425             case 1: {
6426                IRExpr* addr
6427                   = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6428                if (isLD) {
6429                   putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6430                } else {
6431                   storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6432                }
6433                break;
6434             }
6435             default:
6436                vassert(0);
6437          }
6438 
6439          HChar pxStr[20];
6440          pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6441          if (isPX) {
6442             if (mm == BITS5(1,1,1,1,1))
6443                vex_sprintf(pxStr, ", #%u", xferSzB);
6444             else
6445                vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6446          }
6447          const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6448          DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6449              isLD ? "ld" : "st", nRegs,
6450              (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6451              ix, nameIReg64orSP(nn), pxStr);
6452 
6453          return True;
6454       }
6455       /* else fall through */
6456    }
6457 
6458    /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6459    /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6460    /* 31 29     23  20      14    9 4
6461       sz 001000 010 11111 0 11111 n t   LDX{R,RH,RB}  Rt, [Xn|SP]
6462       sz 001000 010 11111 1 11111 n t   LDAX{R,RH,RB} Rt, [Xn|SP]
6463       sz 001000 000 s     0 11111 n t   STX{R,RH,RB}  Ws, Rt, [Xn|SP]
6464       sz 001000 000 s     1 11111 n t   STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6465    */
6466    /* For the "standard" implementation we pass through the LL and SC to
6467       the host.  For the "fallback" implementation, for details see
6468         https://bugs.kde.org/show_bug.cgi?id=344524 and
6469         https://bugs.kde.org/show_bug.cgi?id=369459,
6470       but in short:
6471 
6472       LoadLinked(addr)
6473         gs.LLsize = load_size // 1, 2, 4 or 8
6474         gs.LLaddr = addr
6475         gs.LLdata = zeroExtend(*addr)
6476 
6477       StoreCond(addr, data)
6478         tmp_LLsize = gs.LLsize
6479         gs.LLsize = 0 // "no transaction"
6480         if tmp_LLsize != store_size        -> fail
6481         if addr != gs.LLaddr               -> fail
6482         if zeroExtend(*addr) != gs.LLdata  -> fail
6483         cas_ok = CAS(store_size, addr, gs.LLdata -> data)
6484         if !cas_ok                         -> fail
6485         succeed
6486 
6487       When thread scheduled
6488         gs.LLsize = 0 // "no transaction"
6489         (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
6490          has to do this bit)
6491    */
6492    if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6493        && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6494        && INSN(14,10) == BITS5(1,1,1,1,1)) {
6495       UInt szBlg2     = INSN(31,30);
6496       Bool isLD       = INSN(22,22) == 1;
6497       Bool isAcqOrRel = INSN(15,15) == 1;
6498       UInt ss         = INSN(20,16);
6499       UInt nn         = INSN(9,5);
6500       UInt tt         = INSN(4,0);
6501 
6502       vassert(szBlg2 < 4);
6503       UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6504       IRType ty  = integerIRTypeOfSize(szB);
6505       const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6506 
6507       IRTemp ea = newTemp(Ity_I64);
6508       assign(ea, getIReg64orSP(nn));
6509       /* FIXME generate check that ea is szB-aligned */
6510 
6511       if (isLD && ss == BITS5(1,1,1,1,1)) {
6512          IRTemp res = newTemp(ty);
6513          if (abiinfo->guest__use_fallback_LLSC) {
6514             // Do the load first so we don't update any guest state
6515             // if it faults.
6516             IRTemp loaded_data64 = newTemp(Ity_I64);
6517             assign(loaded_data64, widenUto64(ty, loadLE(ty, mkexpr(ea))));
6518             stmt( IRStmt_Put( OFFB_LLSC_DATA, mkexpr(loaded_data64) ));
6519             stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
6520             stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(szB) ));
6521             putIReg64orZR(tt, mkexpr(loaded_data64));
6522          } else {
6523             stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6524             putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6525          }
6526          if (isAcqOrRel) {
6527             stmt(IRStmt_MBE(Imbe_Fence));
6528          }
6529          DIP("ld%sx%s %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6530              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6531              abiinfo->guest__use_fallback_LLSC
6532                 ? "(fallback implementation)" : "");
6533          return True;
6534       }
6535       if (!isLD) {
6536          if (isAcqOrRel) {
6537             stmt(IRStmt_MBE(Imbe_Fence));
6538          }
6539          IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6540          if (abiinfo->guest__use_fallback_LLSC) {
6541             // This is really ugly, since we don't have any way to do
6542             // proper if-then-else.  First, set up as if the SC failed,
6543             // and jump forwards if it really has failed.
6544 
6545             // Continuation address
6546             IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4);
6547 
6548             // "the SC failed".  Any non-zero value means failure.
6549             putIReg64orZR(ss, mkU64(1));
6550 
6551             IRTemp tmp_LLsize = newTemp(Ity_I64);
6552             assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
6553             stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction"
6554             ));
6555             // Fail if no or wrong-size transaction
6556             vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
6557             stmt( IRStmt_Exit(
6558                      binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(szB)),
6559                      Ijk_Boring, nia, OFFB_PC
6560             ));
6561             // Fail if the address doesn't match the LL address
6562             stmt( IRStmt_Exit(
6563                       binop(Iop_CmpNE64, mkexpr(ea),
6564                                          IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
6565                       Ijk_Boring, nia, OFFB_PC
6566             ));
6567             // Fail if the data doesn't match the LL data
6568             IRTemp llsc_data64 = newTemp(Ity_I64);
6569             assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64));
6570             stmt( IRStmt_Exit(
6571                       binop(Iop_CmpNE64, widenUto64(ty, loadLE(ty, mkexpr(ea))),
6572                                          mkexpr(llsc_data64)),
6573                       Ijk_Boring, nia, OFFB_PC
6574             ));
6575             // Try to CAS the new value in.
6576             IRTemp old = newTemp(ty);
6577             IRTemp expd = newTemp(ty);
6578             assign(expd, narrowFrom64(ty, mkexpr(llsc_data64)));
6579             stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
6580                                      Iend_LE, mkexpr(ea),
6581                                      /*expdHi*/NULL, mkexpr(expd),
6582                                      /*dataHi*/NULL, data
6583             )));
6584             // Fail if the CAS failed (viz, old != expd)
6585             stmt( IRStmt_Exit(
6586                       binop(Iop_CmpNE64,
6587                             widenUto64(ty, mkexpr(old)),
6588                             widenUto64(ty, mkexpr(expd))),
6589                       Ijk_Boring, nia, OFFB_PC
6590             ));
6591             // Otherwise we succeeded (!)
6592             putIReg64orZR(ss, mkU64(0));
6593          } else {
6594             IRTemp res = newTemp(Ity_I1);
6595             stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6596             /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6597                Need to set rS to 1 on failure, 0 on success. */
6598             putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6599                                                mkU64(1)));
6600          }
6601          DIP("st%sx%s %s, %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6602              nameIRegOrZR(False, ss),
6603              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6604              abiinfo->guest__use_fallback_LLSC
6605                 ? "(fallback implementation)" : "");
6606          return True;
6607       }
6608       /* else fall through */
6609    }
6610 
6611    /* ------------------ LDA{R,RH,RB} ------------------ */
6612    /* ------------------ STL{R,RH,RB} ------------------ */
6613    /* 31 29     23  20      14    9 4
6614       sz 001000 110 11111 1 11111 n t   LDAR<sz> Rt, [Xn|SP]
6615       sz 001000 100 11111 1 11111 n t   STLR<sz> Rt, [Xn|SP]
6616    */
6617    if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6618        && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6619       UInt szBlg2 = INSN(31,30);
6620       Bool isLD   = INSN(22,22) == 1;
6621       UInt nn     = INSN(9,5);
6622       UInt tt     = INSN(4,0);
6623 
6624       vassert(szBlg2 < 4);
6625       UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6626       IRType ty  = integerIRTypeOfSize(szB);
6627       const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6628 
6629       IRTemp ea = newTemp(Ity_I64);
6630       assign(ea, getIReg64orSP(nn));
6631       /* FIXME generate check that ea is szB-aligned */
6632 
6633       if (isLD) {
6634          IRTemp res = newTemp(ty);
6635          assign(res, loadLE(ty, mkexpr(ea)));
6636          putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6637          stmt(IRStmt_MBE(Imbe_Fence));
6638          DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6639              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6640       } else {
6641          stmt(IRStmt_MBE(Imbe_Fence));
6642          IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6643          storeLE(mkexpr(ea), data);
6644          DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6645              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6646       }
6647       return True;
6648    }
6649 
6650    /* The PRFM cases that follow are possibly allow Rt values (the
6651       prefetch operation) which are not allowed by the documentation.
6652       This should be looked into. */
6653    /* ------------------ PRFM (immediate) ------------------ */
6654    /* 31           21    9 4
6655       11 111 00110 imm12 n t   PRFM pfrop=Rt, [Xn|SP, #pimm]
6656    */
6657    if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6658       UInt imm12 = INSN(21,10);
6659       UInt nn    = INSN(9,5);
6660       UInt tt    = INSN(4,0);
6661       /* Generating any IR here is pointless, except for documentation
6662          purposes, as it will get optimised away later. */
6663       IRTemp ea = newTemp(Ity_I64);
6664       assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6665       DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6666       return True;
6667    }
6668 
6669    /* ------------------ PRFM (register) ------------------ */
6670    /* 31 29      22 20 15  12 11 9  4
6671       11 1110001 01 Rm opt S  10 Rn Rt    PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}]
6672    */
6673    if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1)
6674        && INSN(11,10) == BITS2(1,0)) {
6675       HChar  dis_buf[64];
6676       UInt   tt = INSN(4,0);
6677       IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
6678       if (ea != IRTemp_INVALID) {
6679          /* No actual code to generate. */
6680          DIP("prfm prfop=%u, %s\n", tt, dis_buf);
6681          return True;
6682       }
6683    }
6684 
6685    /* ------------------ PRFM (unscaled offset) ------------------ */
6686    /* 31 29      22 20   11 9  4
6687       11 1110001 00 imm9 00 Rn Rt    PRFM pfrop=Rt, [Xn|SP, #simm]
6688    */
6689    if (INSN(31,21) == BITS11(1,1, 1,1,1,0,0,0,1, 0,0)
6690        && INSN(11,10) == BITS2(0,0)) {
6691       ULong  imm9   = INSN(20,12);
6692       UInt   nn     = INSN(9,5);
6693       UInt   tt     = INSN(4,0);
6694       ULong  offset = sx_to_64(imm9, 9);
6695       IRTemp ea     = newTemp(Ity_I64);
6696       assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offset)));
6697       /* No actual code to generate. */
6698       DIP("prfum prfop=%u, [%s, #0x%llx]\n", tt, nameIReg64orSP(nn), offset);
6699       return True;
6700    }
6701 
6702    vex_printf("ARM64 front end: load_store\n");
6703    return False;
6704 #  undef INSN
6705 }
6706 
6707 
6708 /*------------------------------------------------------------*/
6709 /*--- Control flow and misc instructions                   ---*/
6710 /*------------------------------------------------------------*/
6711 
6712 static
dis_ARM64_branch_etc(DisResult * dres,UInt insn,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo)6713 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
6714                           const VexArchInfo* archinfo,
6715                           const VexAbiInfo* abiinfo)
6716 {
6717 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
6718 
6719    /* ---------------------- B cond ----------------------- */
6720    /* 31        24    4 3
6721       0101010 0 imm19 0 cond */
6722    if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6723       UInt  cond   = INSN(3,0);
6724       ULong uimm64 = INSN(23,5) << 2;
6725       Long  simm64 = (Long)sx_to_64(uimm64, 21);
6726       vassert(dres->whatNext    == Dis_Continue);
6727       vassert(dres->len         == 4);
6728       vassert(dres->continueAt  == 0);
6729       vassert(dres->jk_StopHere == Ijk_INVALID);
6730       stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
6731                         Ijk_Boring,
6732                         IRConst_U64(guest_PC_curr_instr + simm64),
6733                         OFFB_PC) );
6734       putPC(mkU64(guest_PC_curr_instr + 4));
6735       dres->whatNext    = Dis_StopHere;
6736       dres->jk_StopHere = Ijk_Boring;
6737       DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
6738       return True;
6739    }
6740 
6741    /* -------------------- B{L} uncond -------------------- */
6742    if (INSN(30,26) == BITS5(0,0,1,0,1)) {
6743       /* 000101 imm26  B  (PC + sxTo64(imm26 << 2))
6744          100101 imm26  B  (PC + sxTo64(imm26 << 2))
6745       */
6746       UInt  bLink  = INSN(31,31);
6747       ULong uimm64 = INSN(25,0) << 2;
6748       Long  simm64 = (Long)sx_to_64(uimm64, 28);
6749       if (bLink) {
6750          putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6751       }
6752       putPC(mkU64(guest_PC_curr_instr + simm64));
6753       dres->whatNext = Dis_StopHere;
6754       dres->jk_StopHere = Ijk_Call;
6755       DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
6756                           guest_PC_curr_instr + simm64);
6757       return True;
6758    }
6759 
6760    /* --------------------- B{L} reg --------------------- */
6761    /* 31      24 22 20    15     9  4
6762       1101011 00 10 11111 000000 nn 00000  RET  Rn
6763       1101011 00 01 11111 000000 nn 00000  CALL Rn
6764       1101011 00 00 11111 000000 nn 00000  JMP  Rn
6765    */
6766    if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
6767        && INSN(20,16) == BITS5(1,1,1,1,1)
6768        && INSN(15,10) == BITS6(0,0,0,0,0,0)
6769        && INSN(4,0) == BITS5(0,0,0,0,0)) {
6770       UInt branch_type = INSN(22,21);
6771       UInt nn          = INSN(9,5);
6772       if (branch_type == BITS2(1,0) /* RET */) {
6773          putPC(getIReg64orZR(nn));
6774          dres->whatNext = Dis_StopHere;
6775          dres->jk_StopHere = Ijk_Ret;
6776          DIP("ret %s\n", nameIReg64orZR(nn));
6777          return True;
6778       }
6779       if (branch_type == BITS2(0,1) /* CALL */) {
6780          IRTemp dst = newTemp(Ity_I64);
6781          assign(dst, getIReg64orZR(nn));
6782          putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6783          putPC(mkexpr(dst));
6784          dres->whatNext = Dis_StopHere;
6785          dres->jk_StopHere = Ijk_Call;
6786          DIP("blr %s\n", nameIReg64orZR(nn));
6787          return True;
6788       }
6789       if (branch_type == BITS2(0,0) /* JMP */) {
6790          putPC(getIReg64orZR(nn));
6791          dres->whatNext = Dis_StopHere;
6792          dres->jk_StopHere = Ijk_Boring;
6793          DIP("jmp %s\n", nameIReg64orZR(nn));
6794          return True;
6795       }
6796    }
6797 
6798    /* -------------------- CB{N}Z -------------------- */
6799    /* sf 011 010 1 imm19 Rt   CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6800       sf 011 010 0 imm19 Rt   CBZ  Xt|Wt, (PC + sxTo64(imm19 << 2))
6801    */
6802    if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
6803       Bool    is64   = INSN(31,31) == 1;
6804       Bool    bIfZ   = INSN(24,24) == 0;
6805       ULong   uimm64 = INSN(23,5) << 2;
6806       UInt    rT     = INSN(4,0);
6807       Long    simm64 = (Long)sx_to_64(uimm64, 21);
6808       IRExpr* cond   = NULL;
6809       if (is64) {
6810          cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6811                       getIReg64orZR(rT), mkU64(0));
6812       } else {
6813          cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
6814                       getIReg32orZR(rT), mkU32(0));
6815       }
6816       stmt( IRStmt_Exit(cond,
6817                         Ijk_Boring,
6818                         IRConst_U64(guest_PC_curr_instr + simm64),
6819                         OFFB_PC) );
6820       putPC(mkU64(guest_PC_curr_instr + 4));
6821       dres->whatNext    = Dis_StopHere;
6822       dres->jk_StopHere = Ijk_Boring;
6823       DIP("cb%sz %s, 0x%llx\n",
6824           bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
6825           guest_PC_curr_instr + simm64);
6826       return True;
6827    }
6828 
6829    /* -------------------- TB{N}Z -------------------- */
6830    /* 31 30      24 23  18  5 4
6831       b5 011 011 1  b40 imm14 t  TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6832       b5 011 011 0  b40 imm14 t  TBZ  Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6833    */
6834    if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
6835       UInt    b5     = INSN(31,31);
6836       Bool    bIfZ   = INSN(24,24) == 0;
6837       UInt    b40    = INSN(23,19);
6838       UInt    imm14  = INSN(18,5);
6839       UInt    tt     = INSN(4,0);
6840       UInt    bitNo  = (b5 << 5) | b40;
6841       ULong   uimm64 = imm14 << 2;
6842       Long    simm64 = sx_to_64(uimm64, 16);
6843       IRExpr* cond
6844          = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6845                  binop(Iop_And64,
6846                        binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
6847                        mkU64(1)),
6848                  mkU64(0));
6849       stmt( IRStmt_Exit(cond,
6850                         Ijk_Boring,
6851                         IRConst_U64(guest_PC_curr_instr + simm64),
6852                         OFFB_PC) );
6853       putPC(mkU64(guest_PC_curr_instr + 4));
6854       dres->whatNext    = Dis_StopHere;
6855       dres->jk_StopHere = Ijk_Boring;
6856       DIP("tb%sz %s, #%u, 0x%llx\n",
6857           bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
6858           guest_PC_curr_instr + simm64);
6859       return True;
6860    }
6861 
6862    /* -------------------- SVC -------------------- */
6863    /* 11010100 000 imm16 000 01
6864       Don't bother with anything except the imm16==0 case.
6865    */
6866    if (INSN(31,0) == 0xD4000001) {
6867       putPC(mkU64(guest_PC_curr_instr + 4));
6868       dres->whatNext    = Dis_StopHere;
6869       dres->jk_StopHere = Ijk_Sys_syscall;
6870       DIP("svc #0\n");
6871       return True;
6872    }
6873 
6874    /* ------------------ M{SR,RS} ------------------ */
6875    /* ---- Cases for TPIDR_EL0 ----
6876       0xD51BD0 010 Rt   MSR tpidr_el0, rT
6877       0xD53BD0 010 Rt   MRS rT, tpidr_el0
6878    */
6879    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
6880        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
6881       Bool toSys = INSN(21,21) == 0;
6882       UInt tt    = INSN(4,0);
6883       if (toSys) {
6884          stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
6885          DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
6886       } else {
6887          putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
6888          DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
6889       }
6890       return True;
6891    }
6892    /* ---- Cases for FPCR ----
6893       0xD51B44 000 Rt  MSR fpcr, rT
6894       0xD53B44 000 Rt  MSR rT, fpcr
6895    */
6896    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
6897        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
6898       Bool toSys = INSN(21,21) == 0;
6899       UInt tt    = INSN(4,0);
6900       if (toSys) {
6901          stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
6902          DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
6903       } else {
6904          putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
6905          DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
6906       }
6907       return True;
6908    }
6909    /* ---- Cases for FPSR ----
6910       0xD51B44 001 Rt  MSR fpsr, rT
6911       0xD53B44 001 Rt  MSR rT, fpsr
6912       The only part of this we model is FPSR.QC.  All other bits
6913       are ignored when writing to it and RAZ when reading from it.
6914    */
6915    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
6916        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
6917       Bool toSys = INSN(21,21) == 0;
6918       UInt tt    = INSN(4,0);
6919       if (toSys) {
6920          /* Just deal with FPSR.QC.  Make up a V128 value which is
6921             zero if Xt[27] is zero and any other value if Xt[27] is
6922             nonzero. */
6923          IRTemp qc64 = newTemp(Ity_I64);
6924          assign(qc64, binop(Iop_And64,
6925                             binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
6926                             mkU64(1)));
6927          IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
6928          stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
6929          DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
6930       } else {
6931          /* Generate a value which is all zeroes except for bit 27,
6932             which must be zero if QCFLAG is all zeroes and one otherwise. */
6933          IRTemp qcV128 = newTempV128();
6934          assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
6935          IRTemp qc64 = newTemp(Ity_I64);
6936          assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
6937                                       unop(Iop_V128to64,   mkexpr(qcV128))));
6938          IRExpr* res = binop(Iop_Shl64,
6939                              unop(Iop_1Uto64,
6940                                   binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
6941                              mkU8(27));
6942          putIReg64orZR(tt, res);
6943          DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
6944       }
6945       return True;
6946    }
6947    /* ---- Cases for NZCV ----
6948       D51B42 000 Rt  MSR nzcv, rT
6949       D53B42 000 Rt  MRS rT, nzcv
6950       The only parts of NZCV that actually exist are bits 31:28, which
6951       are the N Z C and V bits themselves.  Hence the flags thunk provides
6952       all the state we need.
6953    */
6954    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
6955        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
6956       Bool  toSys = INSN(21,21) == 0;
6957       UInt  tt    = INSN(4,0);
6958       if (toSys) {
6959          IRTemp t = newTemp(Ity_I64);
6960          assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
6961          setFlags_COPY(t);
6962          DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
6963       } else {
6964          IRTemp res = newTemp(Ity_I64);
6965          assign(res, mk_arm64g_calculate_flags_nzcv());
6966          putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
6967          DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
6968       }
6969       return True;
6970    }
6971    /* ---- Cases for DCZID_EL0 ----
6972       Don't support arbitrary reads and writes to this register.  Just
6973       return the value 16, which indicates that the DC ZVA instruction
6974       is not permitted, so we don't have to emulate it.
6975       D5 3B 00 111 Rt  MRS rT, dczid_el0
6976    */
6977    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
6978       UInt tt = INSN(4,0);
6979       putIReg64orZR(tt, mkU64(1<<4));
6980       DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
6981       return True;
6982    }
6983    /* ---- Cases for CTR_EL0 ----
6984       We just handle reads, and make up a value from the D and I line
6985       sizes in the VexArchInfo we are given, and patch in the following
6986       fields that the Foundation model gives ("natively"):
6987       CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
6988       D5 3B 00 001 Rt  MRS rT, dczid_el0
6989    */
6990    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
6991       UInt tt = INSN(4,0);
6992       /* Need to generate a value from dMinLine_lg2_szB and
6993          dMinLine_lg2_szB.  The value in the register is in 32-bit
6994          units, so need to subtract 2 from the values in the
6995          VexArchInfo.  We can assume that the values here are valid --
6996          disInstr_ARM64 checks them -- so there's no need to deal with
6997          out-of-range cases. */
6998       vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6999               && archinfo->arm64_dMinLine_lg2_szB <= 17
7000               && archinfo->arm64_iMinLine_lg2_szB >= 2
7001               && archinfo->arm64_iMinLine_lg2_szB <= 17);
7002       UInt val
7003          = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
7004                       | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
7005       putIReg64orZR(tt, mkU64(val));
7006       DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
7007       return True;
7008    }
7009    /* ---- Cases for CNTVCT_EL0 ----
7010       This is a timestamp counter of some sort.  Support reads of it only
7011       by passing through to the host.
7012       D5 3B E0 010 Rt  MRS Xt, cntvct_el0
7013    */
7014    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
7015       UInt     tt   = INSN(4,0);
7016       IRTemp   val  = newTemp(Ity_I64);
7017       IRExpr** args = mkIRExprVec_0();
7018       IRDirty* d    = unsafeIRDirty_1_N (
7019                          val,
7020                          0/*regparms*/,
7021                          "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
7022                          &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
7023                          args
7024                       );
7025       /* execute the dirty call, dumping the result in val. */
7026       stmt( IRStmt_Dirty(d) );
7027       putIReg64orZR(tt, mkexpr(val));
7028       DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
7029       return True;
7030    }
7031    /* ---- Cases for CNTFRQ_EL0 ----
7032       This is always RO at EL0, so it's safe to pass through to the host.
7033       D5 3B E0 000 Rt  MRS Xt, cntfrq_el0
7034    */
7035    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE000) {
7036       UInt     tt   = INSN(4,0);
7037       IRTemp   val  = newTemp(Ity_I64);
7038       IRExpr** args = mkIRExprVec_0();
7039       IRDirty* d    = unsafeIRDirty_1_N (
7040                          val,
7041                          0/*regparms*/,
7042                          "arm64g_dirtyhelper_MRS_CNTFRQ_EL0",
7043                          &arm64g_dirtyhelper_MRS_CNTFRQ_EL0,
7044                          args
7045                       );
7046       /* execute the dirty call, dumping the result in val. */
7047       stmt( IRStmt_Dirty(d) );
7048       putIReg64orZR(tt, mkexpr(val));
7049       DIP("mrs %s, cntfrq_el0\n", nameIReg64orZR(tt));
7050       return True;
7051    }
7052 
7053    /* ------------------ IC_IVAU ------------------ */
7054    /* D5 0B 75 001 Rt  ic ivau, rT
7055    */
7056    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
7057       /* We will always be provided with a valid iMinLine value. */
7058       vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
7059               && archinfo->arm64_iMinLine_lg2_szB <= 17);
7060       /* Round the requested address, in rT, down to the start of the
7061          containing block. */
7062       UInt   tt      = INSN(4,0);
7063       ULong  lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
7064       IRTemp addr    = newTemp(Ity_I64);
7065       assign( addr, binop( Iop_And64,
7066                            getIReg64orZR(tt),
7067                            mkU64(~(lineszB - 1))) );
7068       /* Set the invalidation range, request exit-and-invalidate, with
7069          continuation at the next instruction. */
7070       stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7071       stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
7072       /* be paranoid ... */
7073       stmt( IRStmt_MBE(Imbe_Fence) );
7074       putPC(mkU64( guest_PC_curr_instr + 4 ));
7075       dres->whatNext    = Dis_StopHere;
7076       dres->jk_StopHere = Ijk_InvalICache;
7077       DIP("ic ivau, %s\n", nameIReg64orZR(tt));
7078       return True;
7079    }
7080 
7081    /* ------------------ DC_CVAU ------------------ */
7082    /* D5 0B 7B 001 Rt  dc cvau, rT
7083    */
7084    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
7085       /* Exactly the same scheme as for IC IVAU, except we observe the
7086          dMinLine size, and request an Ijk_FlushDCache instead of
7087          Ijk_InvalICache. */
7088       /* We will always be provided with a valid dMinLine value. */
7089       vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
7090               && archinfo->arm64_dMinLine_lg2_szB <= 17);
7091       /* Round the requested address, in rT, down to the start of the
7092          containing block. */
7093       UInt   tt      = INSN(4,0);
7094       ULong  lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
7095       IRTemp addr    = newTemp(Ity_I64);
7096       assign( addr, binop( Iop_And64,
7097                            getIReg64orZR(tt),
7098                            mkU64(~(lineszB - 1))) );
7099       /* Set the flush range, request exit-and-flush, with
7100          continuation at the next instruction. */
7101       stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7102       stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
7103       /* be paranoid ... */
7104       stmt( IRStmt_MBE(Imbe_Fence) );
7105       putPC(mkU64( guest_PC_curr_instr + 4 ));
7106       dres->whatNext    = Dis_StopHere;
7107       dres->jk_StopHere = Ijk_FlushDCache;
7108       DIP("dc cvau, %s\n", nameIReg64orZR(tt));
7109       return True;
7110    }
7111 
7112    /* ------------------ ISB, DMB, DSB ------------------ */
7113    /* 31          21            11  7 6  4
7114       11010 10100 0 00 011 0011 CRm 1 01 11111  DMB opt
7115       11010 10100 0 00 011 0011 CRm 1 00 11111  DSB opt
7116       11010 10100 0 00 011 0011 CRm 1 10 11111  ISB opt
7117    */
7118    if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
7119        && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
7120        && INSN(7,7) == 1
7121        && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
7122       UInt opc = INSN(6,5);
7123       UInt CRm = INSN(11,8);
7124       vassert(opc <= 2 && CRm <= 15);
7125       stmt(IRStmt_MBE(Imbe_Fence));
7126       const HChar* opNames[3]
7127          = { "dsb", "dmb", "isb" };
7128       const HChar* howNames[16]
7129          = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
7130              "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
7131       DIP("%s %s\n", opNames[opc], howNames[CRm]);
7132       return True;
7133    }
7134 
7135    /* -------------------- NOP -------------------- */
7136    if (INSN(31,0) == 0xD503201F) {
7137       DIP("nop\n");
7138       return True;
7139    }
7140 
7141    /* -------------------- BRK -------------------- */
7142    /* 31        23  20    4
7143       1101 0100 001 imm16 00000  BRK #imm16
7144    */
7145    if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
7146        && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
7147       UInt imm16 = INSN(20,5);
7148       /* Request SIGTRAP and then restart of this insn. */
7149       putPC(mkU64(guest_PC_curr_instr + 0));
7150       dres->whatNext    = Dis_StopHere;
7151       dres->jk_StopHere = Ijk_SigTRAP;
7152       DIP("brk #%u\n", imm16);
7153       return True;
7154    }
7155 
7156    /* ------------------- YIELD ------------------- */
7157    /* 31        23        15        7
7158       1101 0101 0000 0011 0010 0000 0011 1111
7159    */
7160    if (INSN(31,0) == 0xD503203F) {
7161       /* Request yield followed by continuation at the next insn. */
7162       putPC(mkU64(guest_PC_curr_instr + 4));
7163       dres->whatNext    = Dis_StopHere;
7164       dres->jk_StopHere = Ijk_Yield;
7165       DIP("yield\n");
7166       return True;
7167    }
7168 
7169    /* -------------------- HINT ------------------- */
7170    /* 31        23        15   11   4 3
7171       1101 0101 0000 0011 0010 imm7 1 1111
7172       Catch otherwise unhandled HINT instructions - any
7173       like YIELD which are explicitly handled should go
7174       above this case.
7175    */
7176    if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,1)
7177        && INSN(23,16) == BITS8(0,0,0,0,0,0,1,1)
7178        && INSN(15,12) == BITS4(0,0,1,0)
7179        && INSN(4,0) == BITS5(1,1,1,1,1)) {
7180       UInt imm7 = INSN(11,5);
7181       DIP("hint #%u\n", imm7);
7182       return True;
7183    }
7184 
7185    /* ------------------- CLREX ------------------ */
7186    /* 31        23        15   11 7
7187       1101 0101 0000 0011 0011 m  0101 1111  CLREX CRm
7188       CRm is apparently ignored.
7189    */
7190    if ((INSN(31,0) & 0xFFFFF0FF) == 0xD503305F) {
7191       UInt mm = INSN(11,8);
7192       /* AFAICS, this simply cancels a (all?) reservations made by a
7193          (any?) preceding LDREX(es).  Arrange to hand it through to
7194          the back end. */
7195       if (abiinfo->guest__use_fallback_LLSC) {
7196          stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) )); // "no transaction"
7197       } else {
7198          stmt( IRStmt_MBE(Imbe_CancelReservation) );
7199       }
7200       DIP("clrex #%u\n", mm);
7201       return True;
7202    }
7203 
7204    vex_printf("ARM64 front end: branch_etc\n");
7205    return False;
7206 #  undef INSN
7207 }
7208 
7209 
7210 /*------------------------------------------------------------*/
7211 /*--- SIMD and FP instructions: helper functions           ---*/
7212 /*------------------------------------------------------------*/
7213 
7214 /* Some constructors for interleave/deinterleave expressions. */
7215 
mk_CatEvenLanes64x2(IRTemp a10,IRTemp b10)7216 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7217    // returns a0 b0
7218    return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
7219 }
7220 
mk_CatOddLanes64x2(IRTemp a10,IRTemp b10)7221 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7222    // returns a1 b1
7223    return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
7224 }
7225 
mk_CatEvenLanes32x4(IRTemp a3210,IRTemp b3210)7226 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7227    // returns a2 a0 b2 b0
7228    return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
7229 }
7230 
mk_CatOddLanes32x4(IRTemp a3210,IRTemp b3210)7231 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7232    // returns a3 a1 b3 b1
7233    return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
7234 }
7235 
mk_InterleaveLO32x4(IRTemp a3210,IRTemp b3210)7236 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
7237    // returns a1 b1 a0 b0
7238    return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
7239 }
7240 
mk_InterleaveHI32x4(IRTemp a3210,IRTemp b3210)7241 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
7242    // returns a3 b3 a2 b2
7243    return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
7244 }
7245 
mk_CatEvenLanes16x8(IRTemp a76543210,IRTemp b76543210)7246 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7247    // returns a6 a4 a2 a0 b6 b4 b2 b0
7248    return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7249 }
7250 
mk_CatOddLanes16x8(IRTemp a76543210,IRTemp b76543210)7251 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7252    // returns a7 a5 a3 a1 b7 b5 b3 b1
7253    return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7254 }
7255 
mk_InterleaveLO16x8(IRTemp a76543210,IRTemp b76543210)7256 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7257    // returns a3 b3 a2 b2 a1 b1 a0 b0
7258    return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
7259 }
7260 
mk_InterleaveHI16x8(IRTemp a76543210,IRTemp b76543210)7261 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7262    // returns a7 b7 a6 b6 a5 b5 a4 b4
7263    return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
7264 }
7265 
mk_CatEvenLanes8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)7266 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
7267                                      IRTemp bFEDCBA9876543210 ) {
7268    // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7269    return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
7270                                       mkexpr(bFEDCBA9876543210));
7271 }
7272 
mk_CatOddLanes8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)7273 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
7274                                     IRTemp bFEDCBA9876543210 ) {
7275    // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7276    return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
7277                                      mkexpr(bFEDCBA9876543210));
7278 }
7279 
mk_InterleaveLO8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)7280 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
7281                                      IRTemp bFEDCBA9876543210 ) {
7282    // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7283    return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
7284                                       mkexpr(bFEDCBA9876543210));
7285 }
7286 
mk_InterleaveHI8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)7287 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
7288                                      IRTemp bFEDCBA9876543210 ) {
7289    // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7290    return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
7291                                       mkexpr(bFEDCBA9876543210));
7292 }
7293 
7294 /* Generate N copies of |bit| in the bottom of a ULong. */
Replicate(ULong bit,Int N)7295 static ULong Replicate ( ULong bit, Int N )
7296 {
7297    vassert(bit <= 1 && N >= 1 && N < 64);
7298    if (bit == 0) {
7299       return 0;
7300     } else {
7301       /* Careful.  This won't work for N == 64. */
7302       return (1ULL << N) - 1;
7303    }
7304 }
7305 
Replicate32x2(ULong bits32)7306 static ULong Replicate32x2 ( ULong bits32 )
7307 {
7308    vassert(0 == (bits32 & ~0xFFFFFFFFULL));
7309    return (bits32 << 32) | bits32;
7310 }
7311 
Replicate16x4(ULong bits16)7312 static ULong Replicate16x4 ( ULong bits16 )
7313 {
7314    vassert(0 == (bits16 & ~0xFFFFULL));
7315    return Replicate32x2((bits16 << 16) | bits16);
7316 }
7317 
Replicate8x8(ULong bits8)7318 static ULong Replicate8x8 ( ULong bits8 )
7319 {
7320    vassert(0 == (bits8 & ~0xFFULL));
7321    return Replicate16x4((bits8 << 8) | bits8);
7322 }
7323 
7324 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
7325    |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
7326    is 64.  In the former case, the upper 32 bits of the returned value
7327    are guaranteed to be zero. */
VFPExpandImm(ULong imm8,Int N)7328 static ULong VFPExpandImm ( ULong imm8, Int N )
7329 {
7330    vassert(imm8 <= 0xFF);
7331    vassert(N == 32 || N == 64);
7332    Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
7333    Int F = N - E - 1;
7334    ULong imm8_6 = (imm8 >> 6) & 1;
7335    /* sign: 1 bit */
7336    /* exp:  E bits */
7337    /* frac: F bits */
7338    ULong sign = (imm8 >> 7) & 1;
7339    ULong exp  = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
7340    ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
7341    vassert(sign < (1ULL << 1));
7342    vassert(exp  < (1ULL << E));
7343    vassert(frac < (1ULL << F));
7344    vassert(1 + E + F == N);
7345    ULong res = (sign << (E+F)) | (exp << F) | frac;
7346    return res;
7347 }
7348 
7349 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
7350    This might fail, as indicated by the returned Bool.  Page 2530 of
7351    the manual. */
AdvSIMDExpandImm(ULong * res,UInt op,UInt cmode,UInt imm8)7352 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
7353                                UInt op, UInt cmode, UInt imm8 )
7354 {
7355    vassert(op <= 1);
7356    vassert(cmode <= 15);
7357    vassert(imm8 <= 255);
7358 
7359    *res = 0; /* will overwrite iff returning True */
7360 
7361    ULong imm64    = 0;
7362    Bool  testimm8 = False;
7363 
7364    switch (cmode >> 1) {
7365       case 0:
7366          testimm8 = False; imm64 = Replicate32x2(imm8); break;
7367       case 1:
7368          testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7369       case 2:
7370          testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7371       case 3:
7372          testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7373       case 4:
7374           testimm8 = False; imm64 = Replicate16x4(imm8); break;
7375       case 5:
7376           testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7377       case 6:
7378           testimm8 = True;
7379           if ((cmode & 1) == 0)
7380               imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7381           else
7382               imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7383           break;
7384       case 7:
7385          testimm8 = False;
7386          if ((cmode & 1) == 0 && op == 0)
7387              imm64 = Replicate8x8(imm8);
7388          if ((cmode & 1) == 0 && op == 1) {
7389              imm64 = 0;   imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7390              imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7391              imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7392              imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7393              imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7394              imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7395              imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7396              imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7397          }
7398          if ((cmode & 1) == 1 && op == 0) {
7399             ULong imm8_7  = (imm8 >> 7) & 1;
7400             ULong imm8_6  = (imm8 >> 6) & 1;
7401             ULong imm8_50 = imm8 & 63;
7402             ULong imm32 = (imm8_7                 << (1 + 5 + 6 + 19))
7403                           | ((imm8_6 ^ 1)         << (5 + 6 + 19))
7404                           | (Replicate(imm8_6, 5) << (6 + 19))
7405                           | (imm8_50              << 19);
7406             imm64 = Replicate32x2(imm32);
7407          }
7408          if ((cmode & 1) == 1 && op == 1) {
7409             // imm64 = imm8<7>:NOT(imm8<6>)
7410             //                :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7411             ULong imm8_7  = (imm8 >> 7) & 1;
7412             ULong imm8_6  = (imm8 >> 6) & 1;
7413             ULong imm8_50 = imm8 & 63;
7414             imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7415                     | (Replicate(imm8_6, 8) << 54)
7416                     | (imm8_50 << 48);
7417          }
7418          break;
7419       default:
7420         vassert(0);
7421    }
7422 
7423    if (testimm8 && imm8 == 0)
7424       return False;
7425 
7426    *res = imm64;
7427    return True;
7428 }
7429 
7430 /* Help a bit for decoding laneage for vector operations that can be
7431    of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7432    and SZ bits, typically for vector floating point. */
getLaneInfo_Q_SZ(IRType * tyI,IRType * tyF,UInt * nLanes,Bool * zeroUpper,const HChar ** arrSpec,Bool bitQ,Bool bitSZ)7433 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI,  /*OUT*/IRType* tyF,
7434                                /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7435                                /*OUT*/const HChar** arrSpec,
7436                                Bool bitQ, Bool bitSZ )
7437 {
7438    vassert(bitQ == True || bitQ == False);
7439    vassert(bitSZ == True || bitSZ == False);
7440    if (bitQ && bitSZ) { // 2x64
7441       if (tyI)       *tyI       = Ity_I64;
7442       if (tyF)       *tyF       = Ity_F64;
7443       if (nLanes)    *nLanes    = 2;
7444       if (zeroUpper) *zeroUpper = False;
7445       if (arrSpec)   *arrSpec   = "2d";
7446       return True;
7447    }
7448    if (bitQ && !bitSZ) { // 4x32
7449       if (tyI)       *tyI       = Ity_I32;
7450       if (tyF)       *tyF       = Ity_F32;
7451       if (nLanes)    *nLanes    = 4;
7452       if (zeroUpper) *zeroUpper = False;
7453       if (arrSpec)   *arrSpec   = "4s";
7454       return True;
7455    }
7456    if (!bitQ && !bitSZ) { // 2x32
7457       if (tyI)       *tyI       = Ity_I32;
7458       if (tyF)       *tyF       = Ity_F32;
7459       if (nLanes)    *nLanes    = 2;
7460       if (zeroUpper) *zeroUpper = True;
7461       if (arrSpec)   *arrSpec   = "2s";
7462       return True;
7463    }
7464    // Else impliedly 1x64, which isn't allowed.
7465    return False;
7466 }
7467 
7468 /* Helper for decoding laneage for shift-style vector operations
7469    that involve an immediate shift amount. */
getLaneInfo_IMMH_IMMB(UInt * shift,UInt * szBlg2,UInt immh,UInt immb)7470 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7471                                     UInt immh, UInt immb )
7472 {
7473    vassert(immh < (1<<4));
7474    vassert(immb < (1<<3));
7475    UInt immhb = (immh << 3) | immb;
7476    if (immh & 8) {
7477       if (shift)  *shift  = 128 - immhb;
7478       if (szBlg2) *szBlg2 = 3;
7479       return True;
7480    }
7481    if (immh & 4) {
7482       if (shift)  *shift  = 64 - immhb;
7483       if (szBlg2) *szBlg2 = 2;
7484       return True;
7485    }
7486    if (immh & 2) {
7487       if (shift)  *shift  = 32 - immhb;
7488       if (szBlg2) *szBlg2 = 1;
7489       return True;
7490    }
7491    if (immh & 1) {
7492       if (shift)  *shift  = 16 - immhb;
7493       if (szBlg2) *szBlg2 = 0;
7494       return True;
7495    }
7496    return False;
7497 }
7498 
7499 /* Generate IR to fold all lanes of the V128 value in 'src' as
7500    characterised by the operator 'op', and return the result in the
7501    bottom bits of a V128, with all other bits set to zero. */
math_FOLDV(IRTemp src,IROp op)7502 static IRTemp math_FOLDV ( IRTemp src, IROp op )
7503 {
7504    /* The basic idea is to use repeated applications of Iop_CatEven*
7505       and Iop_CatOdd* operators to 'src' so as to clone each lane into
7506       a complete vector.  Then fold all those vectors with 'op' and
7507       zero out all but the least significant lane. */
7508    switch (op) {
7509       case Iop_Min8Sx16: case Iop_Min8Ux16:
7510       case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
7511          /* NB: temp naming here is misleading -- the naming is for 8
7512             lanes of 16 bit, whereas what is being operated on is 16
7513             lanes of 8 bits. */
7514          IRTemp x76543210 = src;
7515          IRTemp x76547654 = newTempV128();
7516          IRTemp x32103210 = newTempV128();
7517          assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7518          assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7519          IRTemp x76767676 = newTempV128();
7520          IRTemp x54545454 = newTempV128();
7521          IRTemp x32323232 = newTempV128();
7522          IRTemp x10101010 = newTempV128();
7523          assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7524          assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7525          assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7526          assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7527          IRTemp x77777777 = newTempV128();
7528          IRTemp x66666666 = newTempV128();
7529          IRTemp x55555555 = newTempV128();
7530          IRTemp x44444444 = newTempV128();
7531          IRTemp x33333333 = newTempV128();
7532          IRTemp x22222222 = newTempV128();
7533          IRTemp x11111111 = newTempV128();
7534          IRTemp x00000000 = newTempV128();
7535          assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7536          assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7537          assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7538          assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7539          assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7540          assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7541          assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7542          assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7543          /* Naming not misleading after here. */
7544          IRTemp xAllF = newTempV128();
7545          IRTemp xAllE = newTempV128();
7546          IRTemp xAllD = newTempV128();
7547          IRTemp xAllC = newTempV128();
7548          IRTemp xAllB = newTempV128();
7549          IRTemp xAllA = newTempV128();
7550          IRTemp xAll9 = newTempV128();
7551          IRTemp xAll8 = newTempV128();
7552          IRTemp xAll7 = newTempV128();
7553          IRTemp xAll6 = newTempV128();
7554          IRTemp xAll5 = newTempV128();
7555          IRTemp xAll4 = newTempV128();
7556          IRTemp xAll3 = newTempV128();
7557          IRTemp xAll2 = newTempV128();
7558          IRTemp xAll1 = newTempV128();
7559          IRTemp xAll0 = newTempV128();
7560          assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7561          assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7562          assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7563          assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7564          assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7565          assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7566          assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7567          assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7568          assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7569          assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7570          assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7571          assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7572          assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7573          assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7574          assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7575          assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
7576          IRTemp maxFE = newTempV128();
7577          IRTemp maxDC = newTempV128();
7578          IRTemp maxBA = newTempV128();
7579          IRTemp max98 = newTempV128();
7580          IRTemp max76 = newTempV128();
7581          IRTemp max54 = newTempV128();
7582          IRTemp max32 = newTempV128();
7583          IRTemp max10 = newTempV128();
7584          assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7585          assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7586          assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7587          assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7588          assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7589          assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7590          assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7591          assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
7592          IRTemp maxFEDC = newTempV128();
7593          IRTemp maxBA98 = newTempV128();
7594          IRTemp max7654 = newTempV128();
7595          IRTemp max3210 = newTempV128();
7596          assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7597          assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7598          assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7599          assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7600          IRTemp maxFEDCBA98 = newTempV128();
7601          IRTemp max76543210 = newTempV128();
7602          assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7603          assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7604          IRTemp maxAllLanes = newTempV128();
7605          assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7606                                        mkexpr(max76543210)));
7607          IRTemp res = newTempV128();
7608          assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7609          return res;
7610       }
7611       case Iop_Min16Sx8: case Iop_Min16Ux8:
7612       case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
7613          IRTemp x76543210 = src;
7614          IRTemp x76547654 = newTempV128();
7615          IRTemp x32103210 = newTempV128();
7616          assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7617          assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7618          IRTemp x76767676 = newTempV128();
7619          IRTemp x54545454 = newTempV128();
7620          IRTemp x32323232 = newTempV128();
7621          IRTemp x10101010 = newTempV128();
7622          assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7623          assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7624          assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7625          assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7626          IRTemp x77777777 = newTempV128();
7627          IRTemp x66666666 = newTempV128();
7628          IRTemp x55555555 = newTempV128();
7629          IRTemp x44444444 = newTempV128();
7630          IRTemp x33333333 = newTempV128();
7631          IRTemp x22222222 = newTempV128();
7632          IRTemp x11111111 = newTempV128();
7633          IRTemp x00000000 = newTempV128();
7634          assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7635          assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7636          assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7637          assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7638          assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7639          assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7640          assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7641          assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7642          IRTemp max76 = newTempV128();
7643          IRTemp max54 = newTempV128();
7644          IRTemp max32 = newTempV128();
7645          IRTemp max10 = newTempV128();
7646          assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
7647          assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
7648          assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
7649          assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
7650          IRTemp max7654 = newTempV128();
7651          IRTemp max3210 = newTempV128();
7652          assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7653          assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7654          IRTemp max76543210 = newTempV128();
7655          assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7656          IRTemp res = newTempV128();
7657          assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
7658          return res;
7659       }
7660       case Iop_Max32Fx4: case Iop_Min32Fx4:
7661       case Iop_Min32Sx4: case Iop_Min32Ux4:
7662       case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
7663          IRTemp x3210 = src;
7664          IRTemp x3232 = newTempV128();
7665          IRTemp x1010 = newTempV128();
7666          assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
7667          assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
7668          IRTemp x3333 = newTempV128();
7669          IRTemp x2222 = newTempV128();
7670          IRTemp x1111 = newTempV128();
7671          IRTemp x0000 = newTempV128();
7672          assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
7673          assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
7674          assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
7675          assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
7676          IRTemp max32 = newTempV128();
7677          IRTemp max10 = newTempV128();
7678          assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
7679          assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
7680          IRTemp max3210 = newTempV128();
7681          assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7682          IRTemp res = newTempV128();
7683          assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
7684          return res;
7685       }
7686       case Iop_Add64x2: {
7687          IRTemp x10 = src;
7688          IRTemp x00 = newTempV128();
7689          IRTemp x11 = newTempV128();
7690          assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
7691          assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
7692          IRTemp max10 = newTempV128();
7693          assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
7694          IRTemp res = newTempV128();
7695          assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
7696          return res;
7697       }
7698       default:
7699          vassert(0);
7700    }
7701 }
7702 
7703 
7704 /* Generate IR for TBL and TBX.  This deals with the 128 bit case
7705    only. */
math_TBL_TBX(IRTemp tab[4],UInt len,IRTemp src,IRTemp oor_values)7706 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
7707                              IRTemp oor_values )
7708 {
7709    vassert(len >= 0 && len <= 3);
7710 
7711    /* Generate some useful constants as concisely as possible. */
7712    IRTemp half15 = newTemp(Ity_I64);
7713    assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
7714    IRTemp half16 = newTemp(Ity_I64);
7715    assign(half16, mkU64(0x1010101010101010ULL));
7716 
7717    /* A zero vector */
7718    IRTemp allZero = newTempV128();
7719    assign(allZero, mkV128(0x0000));
7720    /* A vector containing 15 in each 8-bit lane */
7721    IRTemp all15 = newTempV128();
7722    assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
7723    /* A vector containing 16 in each 8-bit lane */
7724    IRTemp all16 = newTempV128();
7725    assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
7726    /* A vector containing 32 in each 8-bit lane */
7727    IRTemp all32 = newTempV128();
7728    assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
7729    /* A vector containing 48 in each 8-bit lane */
7730    IRTemp all48 = newTempV128();
7731    assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
7732    /* A vector containing 64 in each 8-bit lane */
7733    IRTemp all64 = newTempV128();
7734    assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
7735 
7736    /* Group the 16/32/48/64 vectors so as to be indexable. */
7737    IRTemp allXX[4] = { all16, all32, all48, all64 };
7738 
7739    /* Compute the result for each table vector, with zeroes in places
7740       where the index values are out of range, and OR them into the
7741       running vector. */
7742    IRTemp running_result = newTempV128();
7743    assign(running_result, mkV128(0));
7744 
7745    UInt tabent;
7746    for (tabent = 0; tabent <= len; tabent++) {
7747       vassert(tabent >= 0 && tabent < 4);
7748       IRTemp bias = newTempV128();
7749       assign(bias,
7750              mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
7751       IRTemp biased_indices = newTempV128();
7752       assign(biased_indices,
7753              binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
7754       IRTemp valid_mask = newTempV128();
7755       assign(valid_mask,
7756              binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
7757       IRTemp safe_biased_indices = newTempV128();
7758       assign(safe_biased_indices,
7759              binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
7760       IRTemp results_or_junk = newTempV128();
7761       assign(results_or_junk,
7762              binop(Iop_Perm8x16, mkexpr(tab[tabent]),
7763                                  mkexpr(safe_biased_indices)));
7764       IRTemp results_or_zero = newTempV128();
7765       assign(results_or_zero,
7766              binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
7767       /* And OR that into the running result. */
7768       IRTemp tmp = newTempV128();
7769       assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
7770                         mkexpr(running_result)));
7771       running_result = tmp;
7772    }
7773 
7774    /* So now running_result holds the overall result where the indices
7775       are in range, and zero in out-of-range lanes.  Now we need to
7776       compute an overall validity mask and use this to copy in the
7777       lanes in the oor_values for out of range indices.  This is
7778       unnecessary for TBL but will get folded out by iropt, so we lean
7779       on that and generate the same code for TBL and TBX here. */
7780    IRTemp overall_valid_mask = newTempV128();
7781    assign(overall_valid_mask,
7782           binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
7783    IRTemp result = newTempV128();
7784    assign(result,
7785           binop(Iop_OrV128,
7786                 mkexpr(running_result),
7787                 binop(Iop_AndV128,
7788                       mkexpr(oor_values),
7789                       unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
7790    return result;
7791 }
7792 
7793 
7794 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
7795    an op which takes two I64s and produces a V128.  That is, a widening
7796    operator.  Generate IR which applies |opI64x2toV128| to either the
7797    lower (if |is2| is False) or upper (if |is2| is True) halves of
7798    |argL| and |argR|, and return the value in a new IRTemp.
7799 */
7800 static
math_BINARY_WIDENING_V128(Bool is2,IROp opI64x2toV128,IRExpr * argL,IRExpr * argR)7801 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
7802                                    IRExpr* argL, IRExpr* argR )
7803 {
7804    IRTemp res   = newTempV128();
7805    IROp   slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
7806    assign(res, binop(opI64x2toV128, unop(slice, argL),
7807                                     unop(slice, argR)));
7808    return res;
7809 }
7810 
7811 
7812 /* Generate signed/unsigned absolute difference vector IR. */
7813 static
math_ABD(Bool isU,UInt size,IRExpr * argLE,IRExpr * argRE)7814 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
7815 {
7816    vassert(size <= 3);
7817    IRTemp argL = newTempV128();
7818    IRTemp argR = newTempV128();
7819    IRTemp msk  = newTempV128();
7820    IRTemp res  = newTempV128();
7821    assign(argL, argLE);
7822    assign(argR, argRE);
7823    assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
7824                      mkexpr(argL), mkexpr(argR)));
7825    assign(res,
7826           binop(Iop_OrV128,
7827                 binop(Iop_AndV128,
7828                       binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
7829                       mkexpr(msk)),
7830                 binop(Iop_AndV128,
7831                       binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
7832                       unop(Iop_NotV128, mkexpr(msk)))));
7833    return res;
7834 }
7835 
7836 
7837 /* Generate IR that takes a V128 and sign- or zero-widens
7838    either the lower or upper set of lanes to twice-as-wide,
7839    resulting in a new V128 value. */
7840 static
math_WIDEN_LO_OR_HI_LANES(Bool zWiden,Bool fromUpperHalf,UInt sizeNarrow,IRExpr * srcE)7841 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
7842                                    UInt sizeNarrow, IRExpr* srcE )
7843 {
7844    IRTemp src = newTempV128();
7845    IRTemp res = newTempV128();
7846    assign(src, srcE);
7847    switch (sizeNarrow) {
7848       case X10:
7849          assign(res,
7850                 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
7851                       binop(fromUpperHalf ? Iop_InterleaveHI32x4
7852                                           : Iop_InterleaveLO32x4,
7853                             mkexpr(src),
7854                             mkexpr(src)),
7855                       mkU8(32)));
7856          break;
7857       case X01:
7858          assign(res,
7859                 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
7860                       binop(fromUpperHalf ? Iop_InterleaveHI16x8
7861                                           : Iop_InterleaveLO16x8,
7862                             mkexpr(src),
7863                             mkexpr(src)),
7864                       mkU8(16)));
7865          break;
7866       case X00:
7867          assign(res,
7868                 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
7869                       binop(fromUpperHalf ? Iop_InterleaveHI8x16
7870                                           : Iop_InterleaveLO8x16,
7871                             mkexpr(src),
7872                             mkexpr(src)),
7873                       mkU8(8)));
7874          break;
7875       default:
7876          vassert(0);
7877    }
7878    return res;
7879 }
7880 
7881 
7882 /* Generate IR that takes a V128 and sign- or zero-widens
7883    either the even or odd lanes to twice-as-wide,
7884    resulting in a new V128 value. */
7885 static
math_WIDEN_EVEN_OR_ODD_LANES(Bool zWiden,Bool fromOdd,UInt sizeNarrow,IRExpr * srcE)7886 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
7887                                       UInt sizeNarrow, IRExpr* srcE )
7888 {
7889    IRTemp src   = newTempV128();
7890    IRTemp res   = newTempV128();
7891    IROp   opSAR = mkVecSARN(sizeNarrow+1);
7892    IROp   opSHR = mkVecSHRN(sizeNarrow+1);
7893    IROp   opSHL = mkVecSHLN(sizeNarrow+1);
7894    IROp   opSxR = zWiden ? opSHR : opSAR;
7895    UInt   amt   = 0;
7896    switch (sizeNarrow) {
7897       case X10: amt = 32; break;
7898       case X01: amt = 16; break;
7899       case X00: amt = 8;  break;
7900       default: vassert(0);
7901    }
7902    assign(src, srcE);
7903    if (fromOdd) {
7904       assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
7905    } else {
7906       assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
7907                                mkU8(amt)));
7908    }
7909    return res;
7910 }
7911 
7912 
7913 /* Generate IR that takes two V128s and narrows (takes lower half)
7914    of each lane, producing a single V128 value. */
7915 static
math_NARROW_LANES(IRTemp argHi,IRTemp argLo,UInt sizeNarrow)7916 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
7917 {
7918    IRTemp res = newTempV128();
7919    assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
7920                      mkexpr(argHi), mkexpr(argLo)));
7921    return res;
7922 }
7923 
7924 
7925 /* Return a temp which holds the vector dup of the lane of width
7926    (1 << size) obtained from src[laneNo]. */
7927 static
math_DUP_VEC_ELEM(IRExpr * src,UInt size,UInt laneNo)7928 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
7929 {
7930    vassert(size <= 3);
7931    /* Normalise |laneNo| so it is of the form
7932       x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
7933       This puts the bits we want to inspect at constant offsets
7934       regardless of the value of |size|.
7935    */
7936    UInt ix = laneNo << size;
7937    vassert(ix <= 15);
7938    IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
7939    switch (size) {
7940       case 0: /* B */
7941          ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
7942          /* fallthrough */
7943       case 1: /* H */
7944          ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
7945          /* fallthrough */
7946       case 2: /* S */
7947          ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
7948          /* fallthrough */
7949       case 3: /* D */
7950          ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
7951          break;
7952       default:
7953          vassert(0);
7954    }
7955    IRTemp res = newTempV128();
7956    assign(res, src);
7957    Int i;
7958    for (i = 3; i >= 0; i--) {
7959       if (ops[i] == Iop_INVALID)
7960          break;
7961       IRTemp tmp = newTempV128();
7962       assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
7963       res = tmp;
7964    }
7965    return res;
7966 }
7967 
7968 
7969 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
7970    selector encoded as shown below.  Return a new V128 holding the
7971    selected lane from |srcV| dup'd out to V128, and also return the
7972    lane number, log2 of the lane size in bytes, and width-character via
7973    *laneNo, *laneSzLg2 and *laneCh respectively.  It may be that imm5
7974    is an invalid selector, in which case return
7975    IRTemp_INVALID, 0, 0 and '?' respectively.
7976 
7977    imm5 = xxxx1   signifies .b[xxxx]
7978         = xxx10   .h[xxx]
7979         = xx100   .s[xx]
7980         = x1000   .d[x]
7981         otherwise invalid
7982 */
7983 static
handle_DUP_VEC_ELEM(UInt * laneNo,UInt * laneSzLg2,HChar * laneCh,IRExpr * srcV,UInt imm5)7984 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
7985                              /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
7986                              IRExpr* srcV, UInt imm5 )
7987 {
7988    *laneNo    = 0;
7989    *laneSzLg2 = 0;
7990    *laneCh    = '?';
7991 
7992    if (imm5 & 1) {
7993       *laneNo    = (imm5 >> 1) & 15;
7994       *laneSzLg2 = 0;
7995       *laneCh    = 'b';
7996    }
7997    else if (imm5 & 2) {
7998       *laneNo    = (imm5 >> 2) & 7;
7999       *laneSzLg2 = 1;
8000       *laneCh    = 'h';
8001    }
8002    else if (imm5 & 4) {
8003       *laneNo    = (imm5 >> 3) & 3;
8004       *laneSzLg2 = 2;
8005       *laneCh    = 's';
8006    }
8007    else if (imm5 & 8) {
8008       *laneNo    = (imm5 >> 4) & 1;
8009       *laneSzLg2 = 3;
8010       *laneCh    = 'd';
8011    }
8012    else {
8013       /* invalid */
8014       return IRTemp_INVALID;
8015    }
8016 
8017    return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
8018 }
8019 
8020 
8021 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
8022 static
math_VEC_DUP_IMM(UInt size,ULong imm)8023 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
8024 {
8025    IRType ty  = Ity_INVALID;
8026    IRTemp rcS = IRTemp_INVALID;
8027    switch (size) {
8028       case X01:
8029          vassert(imm <= 0xFFFFULL);
8030          ty  = Ity_I16;
8031          rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
8032          break;
8033       case X10:
8034          vassert(imm <= 0xFFFFFFFFULL);
8035          ty  = Ity_I32;
8036          rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
8037          break;
8038       case X11:
8039          ty  = Ity_I64;
8040          rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
8041       default:
8042          vassert(0);
8043    }
8044    IRTemp rcV = math_DUP_TO_V128(rcS, ty);
8045    return rcV;
8046 }
8047 
8048 
8049 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
8050    and the upper can contain any value -- it is ignored.  If |is2| is False,
8051    generate IR to put |new64| in the lower half of vector reg |dd| and zero
8052    the upper half.  If |is2| is True, generate IR to put |new64| in the upper
8053    half of vector reg |dd| and leave the lower half unchanged.  This
8054    simulates the behaviour of the "foo/foo2" instructions in which the
8055    destination is half the width of sources, for example addhn/addhn2.
8056 */
8057 static
putLO64andZUorPutHI64(Bool is2,UInt dd,IRTemp new64)8058 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
8059 {
8060    if (is2) {
8061       /* Get the old contents of Vdd, zero the upper half, and replace
8062          it with 'x'. */
8063       IRTemp t_zero_oldLO = newTempV128();
8064       assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
8065       IRTemp t_newHI_zero = newTempV128();
8066       assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
8067                                                        mkV128(0x0000)));
8068       IRTemp res = newTempV128();
8069       assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
8070                                     mkexpr(t_newHI_zero)));
8071       putQReg128(dd, mkexpr(res));
8072    } else {
8073       /* This is simple. */
8074       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
8075    }
8076 }
8077 
8078 
8079 /* Compute vector SQABS at lane size |size| for |srcE|, returning
8080    the q result in |*qabs| and the normal result in |*nabs|. */
8081 static
math_SQABS(IRTemp * qabs,IRTemp * nabs,IRExpr * srcE,UInt size)8082 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
8083                   IRExpr* srcE, UInt size )
8084 {
8085       IRTemp src, mask, maskn, nsub, qsub;
8086       src = mask = maskn = nsub = qsub = IRTemp_INVALID;
8087       newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
8088       assign(src,   srcE);
8089       assign(mask,  binop(mkVecCMPGTS(size),  mkV128(0x0000), mkexpr(src)));
8090       assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
8091       assign(nsub,  binop(mkVecSUB(size),   mkV128(0x0000), mkexpr(src)));
8092       assign(qsub,  binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
8093       assign(*nabs, binop(Iop_OrV128,
8094                           binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
8095                           binop(Iop_AndV128, mkexpr(src),  mkexpr(maskn))));
8096       assign(*qabs, binop(Iop_OrV128,
8097                           binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
8098                           binop(Iop_AndV128, mkexpr(src),  mkexpr(maskn))));
8099 }
8100 
8101 
8102 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
8103    the q result in |*qneg| and the normal result in |*nneg|. */
8104 static
math_SQNEG(IRTemp * qneg,IRTemp * nneg,IRExpr * srcE,UInt size)8105 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
8106                   IRExpr* srcE, UInt size )
8107 {
8108       IRTemp src = IRTemp_INVALID;
8109       newTempsV128_3(&src, nneg, qneg);
8110       assign(src,   srcE);
8111       assign(*nneg, binop(mkVecSUB(size),   mkV128(0x0000), mkexpr(src)));
8112       assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
8113 }
8114 
8115 
8116 /* Zero all except the least significant lane of |srcE|, where |size|
8117    indicates the lane size in the usual way. */
math_ZERO_ALL_EXCEPT_LOWEST_LANE(UInt size,IRExpr * srcE)8118 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
8119 {
8120    vassert(size < 4);
8121    IRTemp t = newTempV128();
8122    assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
8123    return t;
8124 }
8125 
8126 
8127 /* Generate IR to compute vector widening MULL from either the lower
8128    (is2==False) or upper (is2==True) halves of vecN and vecM.  The
8129    widening multiplies are unsigned when isU==True and signed when
8130    isU==False.  |size| is the narrow lane size indication.  Optionally,
8131    the product may be added to or subtracted from vecD, at the wide lane
8132    size.  This happens when |mas| is 'a' (add) or 's' (sub).  When |mas|
8133    is 'm' (only multiply) then the accumulate part does not happen, and
8134    |vecD| is expected to == IRTemp_INVALID.
8135 
8136    Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
8137    are allowed.  The result is returned in a new IRTemp, which is
8138    returned in *res. */
8139 static
math_MULL_ACC(IRTemp * res,Bool is2,Bool isU,UInt size,HChar mas,IRTemp vecN,IRTemp vecM,IRTemp vecD)8140 void math_MULL_ACC ( /*OUT*/IRTemp* res,
8141                      Bool is2, Bool isU, UInt size, HChar mas,
8142                      IRTemp vecN, IRTemp vecM, IRTemp vecD )
8143 {
8144    vassert(res && *res == IRTemp_INVALID);
8145    vassert(size <= 2);
8146    vassert(mas == 'm' || mas == 'a' || mas == 's');
8147    if (mas == 'm') vassert(vecD == IRTemp_INVALID);
8148    IROp   mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
8149    IROp   accOp = (mas == 'a') ? mkVecADD(size+1)
8150                   : (mas == 's' ? mkVecSUB(size+1)
8151                   : Iop_INVALID);
8152    IRTemp mul   = math_BINARY_WIDENING_V128(is2, mulOp,
8153                                             mkexpr(vecN), mkexpr(vecM));
8154    *res = newTempV128();
8155    assign(*res, mas == 'm' ? mkexpr(mul)
8156                            : binop(accOp, mkexpr(vecD), mkexpr(mul)));
8157 }
8158 
8159 
8160 /* Same as math_MULL_ACC, except the multiply is signed widening,
8161    the multiplied value is then doubled, before being added to or
8162    subtracted from the accumulated value.  And everything is
8163    saturated.  In all cases, saturation residuals are returned
8164    via (sat1q, sat1n), and in the accumulate cases,
8165    via (sat2q, sat2n) too.  All results are returned in new temporaries.
8166    In the no-accumulate case, *sat2q and *sat2n are never instantiated,
8167    so the caller can tell this has happened. */
8168 static
math_SQDMULL_ACC(IRTemp * res,IRTemp * sat1q,IRTemp * sat1n,IRTemp * sat2q,IRTemp * sat2n,Bool is2,UInt size,HChar mas,IRTemp vecN,IRTemp vecM,IRTemp vecD)8169 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
8170                         /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8171                         /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
8172                         Bool is2, UInt size, HChar mas,
8173                         IRTemp vecN, IRTemp vecM, IRTemp vecD )
8174 {
8175    vassert(size <= 2);
8176    vassert(mas == 'm' || mas == 'a' || mas == 's');
8177    /* Compute
8178          sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
8179          sat1n = vecN.D[is2] *s  vecM.d[is2] *  2
8180       IOW take either the low or high halves of vecN and vecM, signed widen,
8181       multiply, double that, and signedly saturate.  Also compute the same
8182       but without saturation.
8183    */
8184    vassert(sat2q && *sat2q == IRTemp_INVALID);
8185    vassert(sat2n && *sat2n == IRTemp_INVALID);
8186    newTempsV128_3(sat1q, sat1n, res);
8187    IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
8188                                          mkexpr(vecN), mkexpr(vecM));
8189    IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
8190                                          mkexpr(vecN), mkexpr(vecM));
8191    assign(*sat1q, mkexpr(tq));
8192    assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
8193 
8194    /* If there is no accumulation, the final result is sat1q,
8195       and there's no assignment to sat2q or sat2n. */
8196    if (mas == 'm') {
8197       assign(*res, mkexpr(*sat1q));
8198       return;
8199    }
8200 
8201    /* Compute
8202          sat2q  = vecD +sq/-sq sat1q
8203          sat2n  = vecD +/-     sat1n
8204          result = sat2q
8205    */
8206    newTempsV128_2(sat2q, sat2n);
8207    assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
8208                         mkexpr(vecD), mkexpr(*sat1q)));
8209    assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
8210                         mkexpr(vecD), mkexpr(*sat1n)));
8211    assign(*res, mkexpr(*sat2q));
8212 }
8213 
8214 
8215 /* Generate IR for widening signed vector multiplies.  The operands
8216    have their lane width signedly widened, and they are then multiplied
8217    at the wider width, returning results in two new IRTemps. */
8218 static
math_MULLS(IRTemp * resHI,IRTemp * resLO,UInt sizeNarrow,IRTemp argL,IRTemp argR)8219 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
8220                   UInt sizeNarrow, IRTemp argL, IRTemp argR )
8221 {
8222    vassert(sizeNarrow <= 2);
8223    newTempsV128_2(resHI, resLO);
8224    IRTemp argLhi = newTemp(Ity_I64);
8225    IRTemp argLlo = newTemp(Ity_I64);
8226    IRTemp argRhi = newTemp(Ity_I64);
8227    IRTemp argRlo = newTemp(Ity_I64);
8228    assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
8229    assign(argLlo, unop(Iop_V128to64,   mkexpr(argL)));
8230    assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
8231    assign(argRlo, unop(Iop_V128to64,   mkexpr(argR)));
8232    IROp opMulls = mkVecMULLS(sizeNarrow);
8233    assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
8234    assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
8235 }
8236 
8237 
8238 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
8239    double that, possibly add a rounding constant (R variants), and take
8240    the high half. */
8241 static
math_SQDMULH(IRTemp * res,IRTemp * sat1q,IRTemp * sat1n,Bool isR,UInt size,IRTemp vN,IRTemp vM)8242 void math_SQDMULH ( /*OUT*/IRTemp* res,
8243                     /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8244                     Bool isR, UInt size, IRTemp vN, IRTemp vM )
8245 {
8246    vassert(size == X01 || size == X10); /* s or h only */
8247 
8248    newTempsV128_3(res, sat1q, sat1n);
8249 
8250    IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
8251    math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
8252 
8253    IRTemp addWide = mkVecADD(size+1);
8254 
8255    if (isR) {
8256       assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8257 
8258       Int    rcShift    = size == X01 ? 15 : 31;
8259       IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
8260       assign(*sat1n,
8261              binop(mkVecCATODDLANES(size),
8262                    binop(addWide,
8263                          binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8264                          mkexpr(roundConst)),
8265                    binop(addWide,
8266                          binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
8267                          mkexpr(roundConst))));
8268    } else {
8269       assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8270 
8271       assign(*sat1n,
8272              binop(mkVecCATODDLANES(size),
8273                    binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8274                    binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
8275    }
8276 
8277    assign(*res, mkexpr(*sat1q));
8278 }
8279 
8280 
8281 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm.  Put the result in
8282    a new temp in *res, and the Q difference pair in new temps in
8283    *qDiff1 and *qDiff2 respectively.  |nm| denotes which of the
8284    three operations it is. */
8285 static
math_QSHL_IMM(IRTemp * res,IRTemp * qDiff1,IRTemp * qDiff2,IRTemp src,UInt size,UInt shift,const HChar * nm)8286 void math_QSHL_IMM ( /*OUT*/IRTemp* res,
8287                      /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
8288                      IRTemp src, UInt size, UInt shift, const HChar* nm )
8289 {
8290    vassert(size <= 3);
8291    UInt laneBits = 8 << size;
8292    vassert(shift < laneBits);
8293    newTempsV128_3(res, qDiff1, qDiff2);
8294    IRTemp z128 = newTempV128();
8295    assign(z128, mkV128(0x0000));
8296 
8297    /* UQSHL */
8298    if (vex_streq(nm, "uqshl")) {
8299       IROp qop = mkVecQSHLNSATUU(size);
8300       assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8301       if (shift == 0) {
8302          /* No shift means no saturation. */
8303          assign(*qDiff1, mkexpr(z128));
8304          assign(*qDiff2, mkexpr(z128));
8305       } else {
8306          /* Saturation has occurred if any of the shifted-out bits are
8307             nonzero.  We get the shifted-out bits by right-shifting the
8308             original value. */
8309          UInt rshift = laneBits - shift;
8310          vassert(rshift >= 1 && rshift < laneBits);
8311          assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8312          assign(*qDiff2, mkexpr(z128));
8313       }
8314       return;
8315    }
8316 
8317    /* SQSHL */
8318    if (vex_streq(nm, "sqshl")) {
8319       IROp qop = mkVecQSHLNSATSS(size);
8320       assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8321       if (shift == 0) {
8322          /* No shift means no saturation. */
8323          assign(*qDiff1, mkexpr(z128));
8324          assign(*qDiff2, mkexpr(z128));
8325       } else {
8326          /* Saturation has occurred if any of the shifted-out bits are
8327             different from the top bit of the original value. */
8328          UInt rshift = laneBits - 1 - shift;
8329          vassert(rshift >= 0 && rshift < laneBits-1);
8330          /* qDiff1 is the shifted out bits, and the top bit of the original
8331             value, preceded by zeroes. */
8332          assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8333          /* qDiff2 is the top bit of the original value, cloned the
8334             correct number of times. */
8335          assign(*qDiff2, binop(mkVecSHRN(size),
8336                                binop(mkVecSARN(size), mkexpr(src),
8337                                                       mkU8(laneBits-1)),
8338                                mkU8(rshift)));
8339          /* This also succeeds in comparing the top bit of the original
8340             value to itself, which is a bit stupid, but not wrong. */
8341       }
8342       return;
8343    }
8344 
8345    /* SQSHLU */
8346    if (vex_streq(nm, "sqshlu")) {
8347       IROp qop = mkVecQSHLNSATSU(size);
8348       assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8349       if (shift == 0) {
8350          /* If there's no shift, saturation depends on the top bit
8351             of the source. */
8352          assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
8353          assign(*qDiff2, mkexpr(z128));
8354       } else {
8355          /* Saturation has occurred if any of the shifted-out bits are
8356             nonzero.  We get the shifted-out bits by right-shifting the
8357             original value. */
8358          UInt rshift = laneBits - shift;
8359          vassert(rshift >= 1 && rshift < laneBits);
8360          assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8361          assign(*qDiff2, mkexpr(z128));
8362       }
8363       return;
8364    }
8365 
8366    vassert(0);
8367 }
8368 
8369 
8370 /* Generate IR to do SRHADD and URHADD. */
8371 static
math_RHADD(UInt size,Bool isU,IRTemp aa,IRTemp bb)8372 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8373 {
8374    /* Generate this:
8375       (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8376    */
8377    vassert(size <= 3);
8378    IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8379    IROp opADD = mkVecADD(size);
8380    /* The only tricky bit is to generate the correct vector 1 constant. */
8381    const ULong ones64[4]
8382       = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8383           0x0000000100000001ULL, 0x0000000000000001ULL };
8384    IRTemp imm64 = newTemp(Ity_I64);
8385    assign(imm64, mkU64(ones64[size]));
8386    IRTemp vecOne = newTempV128();
8387    assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8388    IRTemp scaOne = newTemp(Ity_I8);
8389    assign(scaOne, mkU8(1));
8390    IRTemp res = newTempV128();
8391    assign(res,
8392           binop(opADD,
8393                 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8394                 binop(opADD,
8395                       binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8396                       binop(opSHR,
8397                             binop(opADD,
8398                                   binop(opADD,
8399                                         binop(Iop_AndV128, mkexpr(aa),
8400                                                            mkexpr(vecOne)),
8401                                         binop(Iop_AndV128, mkexpr(bb),
8402                                                            mkexpr(vecOne))
8403                                   ),
8404                                   mkexpr(vecOne)
8405                             ),
8406                             mkexpr(scaOne)
8407                       )
8408                 )
8409           )
8410    );
8411    return res;
8412 }
8413 
8414 
8415 /* QCFLAG tracks the SIMD sticky saturation status.  Update the status
8416    thusly: if, after application of |opZHI| to both |qres| and |nres|,
8417    they have the same value, leave QCFLAG unchanged.  Otherwise, set it
8418    (implicitly) to 1.  |opZHI| may only be one of the Iop_ZeroHIxxofV128
8419    operators, or Iop_INVALID, in which case |qres| and |nres| are used
8420    unmodified.  The presence |opZHI| means this function can be used to
8421    generate QCFLAG update code for both scalar and vector SIMD operations.
8422 */
8423 static
updateQCFLAGwithDifferenceZHI(IRTemp qres,IRTemp nres,IROp opZHI)8424 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
8425 {
8426    IRTemp diff      = newTempV128();
8427    IRTemp oldQCFLAG = newTempV128();
8428    IRTemp newQCFLAG = newTempV128();
8429    if (opZHI == Iop_INVALID) {
8430       assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8431    } else {
8432       vassert(opZHI == Iop_ZeroHI64ofV128
8433               || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
8434       assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8435    }
8436    assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8437    assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8438    stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8439 }
8440 
8441 
8442 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8443    are used unmodified, hence suitable for QCFLAG updates for whole-vector
8444    operations. */
8445 static
updateQCFLAGwithDifference(IRTemp qres,IRTemp nres)8446 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8447 {
8448    updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8449 }
8450 
8451 
8452 /* Generate IR to rearrange two vector values in a way which is useful
8453    for doing S/D add-pair etc operations.  There are 3 cases:
8454 
8455    2d:  [m1 m0] [n1 n0]  -->  [m1 n1] [m0 n0]
8456 
8457    4s:  [m3 m2 m1 m0] [n3 n2 n1 n0]  -->  [m3 m1 n3 n1] [m2 m0 n2 n0]
8458 
8459    2s:  [m2 m2 m1 m0] [n3 n2 n1 n0]  -->  [0 0 m1 n1] [0 0 m0 n0]
8460 
8461    The cases are distinguished as follows:
8462    isD == True,  bitQ == 1  =>  2d
8463    isD == False, bitQ == 1  =>  4s
8464    isD == False, bitQ == 0  =>  2s
8465 */
8466 static
math_REARRANGE_FOR_FLOATING_PAIRWISE(IRTemp * rearrL,IRTemp * rearrR,IRTemp vecM,IRTemp vecN,Bool isD,UInt bitQ)8467 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8468         /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8469         IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
8470      )
8471 {
8472    vassert(rearrL && *rearrL == IRTemp_INVALID);
8473    vassert(rearrR && *rearrR == IRTemp_INVALID);
8474    *rearrL = newTempV128();
8475    *rearrR = newTempV128();
8476    if (isD) {
8477       // 2d case
8478       vassert(bitQ == 1);
8479       assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8480       assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8481    }
8482    else if (!isD && bitQ == 1) {
8483       // 4s case
8484       assign(*rearrL, binop(Iop_CatOddLanes32x4,  mkexpr(vecM), mkexpr(vecN)));
8485       assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8486    } else {
8487       // 2s case
8488       vassert(!isD && bitQ == 0);
8489       IRTemp m1n1m0n0 = newTempV128();
8490       IRTemp m0n0m1n1 = newTempV128();
8491       assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8492                              mkexpr(vecM), mkexpr(vecN)));
8493       assign(m0n0m1n1, triop(Iop_SliceV128,
8494                              mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8495       assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8496       assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8497    }
8498 }
8499 
8500 
8501 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
two_to_the_minus(Int n)8502 static Double two_to_the_minus ( Int n )
8503 {
8504    if (n == 1) return 0.5;
8505    vassert(n >= 2 && n <= 64);
8506    Int half = n / 2;
8507    return two_to_the_minus(half) * two_to_the_minus(n - half);
8508 }
8509 
8510 
8511 /* Returns 2.0 ^ n for n in 1 .. 64 */
two_to_the_plus(Int n)8512 static Double two_to_the_plus ( Int n )
8513 {
8514    if (n == 1) return 2.0;
8515    vassert(n >= 2 && n <= 64);
8516    Int half = n / 2;
8517    return two_to_the_plus(half) * two_to_the_plus(n - half);
8518 }
8519 
8520 
8521 /*------------------------------------------------------------*/
8522 /*--- SIMD and FP instructions                             ---*/
8523 /*------------------------------------------------------------*/
8524 
8525 static
dis_AdvSIMD_EXT(DisResult * dres,UInt insn)8526 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
8527 {
8528    /* 31  29     23  21 20 15 14   10 9 4
8529       0 q 101110 op2 0  m  0  imm4 0  n d
8530       Decode fields: op2
8531    */
8532 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8533    if (INSN(31,31) != 0
8534        || INSN(29,24) != BITS6(1,0,1,1,1,0)
8535        || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8536       return False;
8537    }
8538    UInt bitQ = INSN(30,30);
8539    UInt op2  = INSN(23,22);
8540    UInt mm   = INSN(20,16);
8541    UInt imm4 = INSN(14,11);
8542    UInt nn   = INSN(9,5);
8543    UInt dd   = INSN(4,0);
8544 
8545    if (op2 == BITS2(0,0)) {
8546       /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
8547       IRTemp sHi = newTempV128();
8548       IRTemp sLo = newTempV128();
8549       IRTemp res = newTempV128();
8550       assign(sHi, getQReg128(mm));
8551       assign(sLo, getQReg128(nn));
8552       if (bitQ == 1) {
8553          if (imm4 == 0) {
8554             assign(res, mkexpr(sLo));
8555          } else {
8556             vassert(imm4 >= 1 && imm4 <= 15);
8557             assign(res, triop(Iop_SliceV128,
8558                               mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
8559          }
8560          putQReg128(dd, mkexpr(res));
8561          DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8562       } else {
8563          if (imm4 >= 8) return False;
8564          if (imm4 == 0) {
8565             assign(res, mkexpr(sLo));
8566          } else {
8567             vassert(imm4 >= 1 && imm4 <= 7);
8568             IRTemp hi64lo64 = newTempV128();
8569             assign(hi64lo64, binop(Iop_InterleaveLO64x2,
8570                                    mkexpr(sHi), mkexpr(sLo)));
8571             assign(res, triop(Iop_SliceV128,
8572                               mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
8573          }
8574          putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8575          DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
8576       }
8577       return True;
8578    }
8579 
8580    return False;
8581 #  undef INSN
8582 }
8583 
8584 
8585 static
dis_AdvSIMD_TBL_TBX(DisResult * dres,UInt insn)8586 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
8587 {
8588    /* 31  29     23  21 20 15 14  12 11 9 4
8589       0 q 001110 op2 0  m  0  len op 00 n d
8590       Decode fields: op2,len,op
8591    */
8592 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8593    if (INSN(31,31) != 0
8594        || INSN(29,24) != BITS6(0,0,1,1,1,0)
8595        || INSN(21,21) != 0
8596        || INSN(15,15) != 0
8597        || INSN(11,10) != BITS2(0,0)) {
8598       return False;
8599    }
8600    UInt bitQ  = INSN(30,30);
8601    UInt op2   = INSN(23,22);
8602    UInt mm    = INSN(20,16);
8603    UInt len   = INSN(14,13);
8604    UInt bitOP = INSN(12,12);
8605    UInt nn    = INSN(9,5);
8606    UInt dd    = INSN(4,0);
8607 
8608    if (op2 == X00) {
8609       /* -------- 00,xx,0 TBL, xx register table -------- */
8610       /* -------- 00,xx,1 TBX, xx register table -------- */
8611       /* 31  28        20 15 14  12  9 4
8612          0q0 01110 000 m  0  len 000 n d  TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8613          0q0 01110 000 m  0  len 100 n d  TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8614          where Ta = 16b(q=1) or 8b(q=0)
8615       */
8616       Bool isTBX = bitOP == 1;
8617       /* The out-of-range values to use. */
8618       IRTemp oor_values = newTempV128();
8619       assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
8620       /* src value */
8621       IRTemp src = newTempV128();
8622       assign(src, getQReg128(mm));
8623       /* The table values */
8624       IRTemp tab[4];
8625       UInt   i;
8626       for (i = 0; i <= len; i++) {
8627          vassert(i < 4);
8628          tab[i] = newTempV128();
8629          assign(tab[i], getQReg128((nn + i) % 32));
8630       }
8631       IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
8632       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8633       const HChar* Ta = bitQ ==1 ? "16b" : "8b";
8634       const HChar* nm = isTBX ? "tbx" : "tbl";
8635       DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n",
8636           nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
8637       return True;
8638    }
8639 
8640 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8641    return False;
8642 #  undef INSN
8643 }
8644 
8645 
8646 static
dis_AdvSIMD_ZIP_UZP_TRN(DisResult * dres,UInt insn)8647 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
8648 {
8649    /* 31  29     23   21 20 15 14     11 9 4
8650       0 q 001110 size 0  m  0  opcode 10 n d
8651       Decode fields: opcode
8652    */
8653 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8654    if (INSN(31,31) != 0
8655        || INSN(29,24) != BITS6(0,0,1,1,1,0)
8656        || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8657       return False;
8658    }
8659    UInt bitQ   = INSN(30,30);
8660    UInt size   = INSN(23,22);
8661    UInt mm     = INSN(20,16);
8662    UInt opcode = INSN(14,12);
8663    UInt nn     = INSN(9,5);
8664    UInt dd     = INSN(4,0);
8665 
8666    if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
8667       /* -------- 001 UZP1 std7_std7_std7 -------- */
8668       /* -------- 101 UZP2 std7_std7_std7 -------- */
8669       if (bitQ == 0 && size == X11) return False; // implied 1d case
8670       Bool   isUZP1 = opcode == BITS3(0,0,1);
8671       IROp   op     = isUZP1 ? mkVecCATEVENLANES(size)
8672                              : mkVecCATODDLANES(size);
8673       IRTemp preL = newTempV128();
8674       IRTemp preR = newTempV128();
8675       IRTemp res  = newTempV128();
8676       if (bitQ == 0) {
8677          assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
8678                                                   getQReg128(nn)));
8679          assign(preR, mkexpr(preL));
8680       } else {
8681          assign(preL, getQReg128(mm));
8682          assign(preR, getQReg128(nn));
8683       }
8684       assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8685       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8686       const HChar* nm  = isUZP1 ? "uzp1" : "uzp2";
8687       const HChar* arr = nameArr_Q_SZ(bitQ, size);
8688       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8689           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8690       return True;
8691    }
8692 
8693    if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
8694       /* -------- 010 TRN1 std7_std7_std7 -------- */
8695       /* -------- 110 TRN2 std7_std7_std7 -------- */
8696       if (bitQ == 0 && size == X11) return False; // implied 1d case
8697       Bool   isTRN1 = opcode == BITS3(0,1,0);
8698       IROp   op1    = isTRN1 ? mkVecCATEVENLANES(size)
8699                              : mkVecCATODDLANES(size);
8700       IROp op2 = mkVecINTERLEAVEHI(size);
8701       IRTemp srcM = newTempV128();
8702       IRTemp srcN = newTempV128();
8703       IRTemp res  = newTempV128();
8704       assign(srcM, getQReg128(mm));
8705       assign(srcN, getQReg128(nn));
8706       assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
8707                              binop(op1, mkexpr(srcN), mkexpr(srcN))));
8708       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8709       const HChar* nm  = isTRN1 ? "trn1" : "trn2";
8710       const HChar* arr = nameArr_Q_SZ(bitQ, size);
8711       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8712           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8713       return True;
8714    }
8715 
8716    if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
8717       /* -------- 011 ZIP1 std7_std7_std7 -------- */
8718       /* -------- 111 ZIP2 std7_std7_std7 -------- */
8719       if (bitQ == 0 && size == X11) return False; // implied 1d case
8720       Bool   isZIP1 = opcode == BITS3(0,1,1);
8721       IROp   op     = isZIP1 ? mkVecINTERLEAVELO(size)
8722                              : mkVecINTERLEAVEHI(size);
8723       IRTemp preL = newTempV128();
8724       IRTemp preR = newTempV128();
8725       IRTemp res  = newTempV128();
8726       if (bitQ == 0 && !isZIP1) {
8727          IRTemp z128 = newTempV128();
8728          assign(z128, mkV128(0x0000));
8729          // preL = Vm shifted left 32 bits
8730          // preR = Vn shifted left 32 bits
8731          assign(preL, triop(Iop_SliceV128,
8732                             getQReg128(mm), mkexpr(z128), mkU8(12)));
8733          assign(preR, triop(Iop_SliceV128,
8734                             getQReg128(nn), mkexpr(z128), mkU8(12)));
8735 
8736       } else {
8737          assign(preL, getQReg128(mm));
8738          assign(preR, getQReg128(nn));
8739       }
8740       assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8741       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8742       const HChar* nm  = isZIP1 ? "zip1" : "zip2";
8743       const HChar* arr = nameArr_Q_SZ(bitQ, size);
8744       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8745           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8746       return True;
8747    }
8748 
8749    return False;
8750 #  undef INSN
8751 }
8752 
8753 
8754 static
dis_AdvSIMD_across_lanes(DisResult * dres,UInt insn)8755 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
8756 {
8757    /* 31    28    23   21    16     11 9 4
8758       0 q u 01110 size 11000 opcode 10 n d
8759       Decode fields: u,size,opcode
8760    */
8761 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8762    if (INSN(31,31) != 0
8763        || INSN(28,24) != BITS5(0,1,1,1,0)
8764        || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
8765       return False;
8766    }
8767    UInt bitQ   = INSN(30,30);
8768    UInt bitU   = INSN(29,29);
8769    UInt size   = INSN(23,22);
8770    UInt opcode = INSN(16,12);
8771    UInt nn     = INSN(9,5);
8772    UInt dd     = INSN(4,0);
8773 
8774    if (opcode == BITS5(0,0,0,1,1)) {
8775       /* -------- 0,xx,00011 SADDLV -------- */
8776       /* -------- 1,xx,00011 UADDLV -------- */
8777       /* size is the narrow size */
8778       if (size == X11 || (size == X10 && bitQ == 0)) return False;
8779       Bool   isU = bitU == 1;
8780       IRTemp src = newTempV128();
8781       assign(src, getQReg128(nn));
8782       /* The basic plan is to widen the lower half, and if Q = 1,
8783          the upper half too.  Add them together (if Q = 1), and in
8784          either case fold with add at twice the lane width.
8785       */
8786       IRExpr* widened
8787          = mkexpr(math_WIDEN_LO_OR_HI_LANES(
8788                      isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
8789       if (bitQ == 1) {
8790          widened
8791             = binop(mkVecADD(size+1),
8792                     widened,
8793                     mkexpr(math_WIDEN_LO_OR_HI_LANES(
8794                               isU, True/*fromUpperHalf*/, size, mkexpr(src)))
8795               );
8796       }
8797       /* Now fold. */
8798       IRTemp tWi = newTempV128();
8799       assign(tWi, widened);
8800       IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
8801       putQReg128(dd, mkexpr(res));
8802       const HChar* arr = nameArr_Q_SZ(bitQ, size);
8803       const HChar  ch  = "bhsd"[size];
8804       DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
8805           nameQReg128(dd), ch, nameQReg128(nn), arr);
8806       return True;
8807    }
8808 
8809    UInt ix = 0;
8810    /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
8811    else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
8812    else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
8813    /**/
8814    if (ix != 0) {
8815       /* -------- 0,xx,01010: SMAXV -------- (1) */
8816       /* -------- 1,xx,01010: UMAXV -------- (2) */
8817       /* -------- 0,xx,11010: SMINV -------- (3) */
8818       /* -------- 1,xx,11010: UMINV -------- (4) */
8819       /* -------- 0,xx,11011: ADDV  -------- (5) */
8820       vassert(ix >= 1 && ix <= 5);
8821       if (size == X11) return False; // 1d,2d cases not allowed
8822       if (size == X10 && bitQ == 0) return False; // 2s case not allowed
8823       const IROp opMAXS[3]
8824          = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
8825       const IROp opMAXU[3]
8826          = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
8827       const IROp opMINS[3]
8828          = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
8829       const IROp opMINU[3]
8830          = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
8831       const IROp opADD[3]
8832          = { Iop_Add8x16,  Iop_Add16x8,  Iop_Add32x4 };
8833       vassert(size < 3);
8834       IROp op = Iop_INVALID;
8835       const HChar* nm = NULL;
8836       switch (ix) {
8837          case 1: op = opMAXS[size]; nm = "smaxv"; break;
8838          case 2: op = opMAXU[size]; nm = "umaxv"; break;
8839          case 3: op = opMINS[size]; nm = "sminv"; break;
8840          case 4: op = opMINU[size]; nm = "uminv"; break;
8841          case 5: op = opADD[size];  nm = "addv";  break;
8842          default: vassert(0);
8843       }
8844       vassert(op != Iop_INVALID && nm != NULL);
8845       IRTemp tN1 = newTempV128();
8846       assign(tN1, getQReg128(nn));
8847       /* If Q == 0, we're just folding lanes in the lower half of
8848          the value.  In which case, copy the lower half of the
8849          source into the upper half, so we can then treat it the
8850          same as the full width case.  Except for the addition case,
8851          in which we have to zero out the upper half. */
8852       IRTemp tN2 = newTempV128();
8853       assign(tN2, bitQ == 0
8854                      ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
8855                                 : mk_CatEvenLanes64x2(tN1,tN1))
8856                      : mkexpr(tN1));
8857       IRTemp res = math_FOLDV(tN2, op);
8858       if (res == IRTemp_INVALID)
8859          return False; /* means math_FOLDV
8860                           doesn't handle this case yet */
8861       putQReg128(dd, mkexpr(res));
8862       const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
8863       IRType laneTy = tys[size];
8864       const HChar* arr = nameArr_Q_SZ(bitQ, size);
8865       DIP("%s %s, %s.%s\n", nm,
8866           nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
8867       return True;
8868    }
8869 
8870    if ((size == X00 || size == X10)
8871        && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
8872       /* -------- 0,00,01100: FMAXMNV s_4s -------- */
8873       /* -------- 0,10,01100: FMINMNV s_4s -------- */
8874       /* -------- 1,00,01111: FMAXV   s_4s -------- */
8875       /* -------- 1,10,01111: FMINV   s_4s -------- */
8876       /* FMAXNM, FMINNM: FIXME -- KLUDGED */
8877       if (bitQ == 0) return False; // Only 4s is allowed
8878       Bool   isMIN = (size & 2) == 2;
8879       Bool   isNM  = opcode == BITS5(0,1,1,0,0);
8880       IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
8881       IRTemp src = newTempV128();
8882       assign(src, getQReg128(nn));
8883       IRTemp res = math_FOLDV(src, opMXX);
8884       putQReg128(dd, mkexpr(res));
8885       DIP("%s%sv s%u, %u.4s\n",
8886           isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
8887       return True;
8888    }
8889 
8890 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8891    return False;
8892 #  undef INSN
8893 }
8894 
8895 
8896 static
dis_AdvSIMD_copy(DisResult * dres,UInt insn)8897 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8898 {
8899    /* 31     28       20   15 14   10 9 4
8900       0 q op 01110000 imm5 0  imm4 1  n d
8901       Decode fields: q,op,imm4
8902    */
8903 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
8904    if (INSN(31,31) != 0
8905        || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
8906        || INSN(15,15) != 0 || INSN(10,10) != 1) {
8907       return False;
8908    }
8909    UInt bitQ  = INSN(30,30);
8910    UInt bitOP = INSN(29,29);
8911    UInt imm5  = INSN(20,16);
8912    UInt imm4  = INSN(14,11);
8913    UInt nn    = INSN(9,5);
8914    UInt dd    = INSN(4,0);
8915 
8916    /* -------- x,0,0000: DUP (element, vector) -------- */
8917    /* 31  28       20   15     9 4
8918       0q0 01110000 imm5 000001 n d  DUP Vd.T, Vn.Ts[index]
8919    */
8920    if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
8921       UInt   laneNo    = 0;
8922       UInt   laneSzLg2 = 0;
8923       HChar  laneCh    = '?';
8924       IRTemp res       = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
8925                                              getQReg128(nn), imm5);
8926       if (res == IRTemp_INVALID)
8927          return False;
8928       if (bitQ == 0 && laneSzLg2 == X11)
8929          return False; /* .1d case */
8930       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8931       const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
8932       DIP("dup %s.%s, %s.%c[%u]\n",
8933            nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
8934       return True;
8935    }
8936 
8937    /* -------- x,0,0001: DUP (general, vector) -------- */
8938    /* 31  28       20   15       9 4
8939       0q0 01110000 imm5 0 0001 1 n d  DUP Vd.T, Rn
8940       Q=0 writes 64, Q=1 writes 128
8941       imm5: xxxx1  8B(q=0)      or 16b(q=1),     R=W
8942             xxx10  4H(q=0)      or 8H(q=1),      R=W
8943             xx100  2S(q=0)      or 4S(q=1),      R=W
8944             x1000  Invalid(q=0) or 2D(q=1),      R=X
8945             x0000  Invalid(q=0) or Invalid(q=1)
8946       Require op=0, imm4=0001
8947    */
8948    if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
8949       Bool   isQ = bitQ == 1;
8950       IRTemp w0  = newTemp(Ity_I64);
8951       const HChar* arT = "??";
8952       IRType laneTy = Ity_INVALID;
8953       if (imm5 & 1) {
8954          arT    = isQ ? "16b" : "8b";
8955          laneTy = Ity_I8;
8956          assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
8957       }
8958       else if (imm5 & 2) {
8959          arT    = isQ ? "8h" : "4h";
8960          laneTy = Ity_I16;
8961          assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
8962       }
8963       else if (imm5 & 4) {
8964          arT    = isQ ? "4s" : "2s";
8965          laneTy = Ity_I32;
8966          assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
8967       }
8968       else if ((imm5 & 8) && isQ) {
8969          arT    = "2d";
8970          laneTy = Ity_I64;
8971          assign(w0, getIReg64orZR(nn));
8972       }
8973       else {
8974          /* invalid; leave laneTy unchanged. */
8975       }
8976       /* */
8977       if (laneTy != Ity_INVALID) {
8978          IRTemp w1 = math_DUP_TO_64(w0, laneTy);
8979          putQReg128(dd, binop(Iop_64HLtoV128,
8980                               isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
8981          DIP("dup %s.%s, %s\n",
8982              nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
8983          return True;
8984       }
8985       /* invalid */
8986       return False;
8987    }
8988 
8989    /* -------- 1,0,0011: INS (general) -------- */
8990    /* 31  28       20   15     9 4
8991       010 01110000 imm5 000111 n d  INS Vd.Ts[ix], Rn
8992       where Ts,ix = case imm5 of xxxx1 -> B, xxxx
8993                                  xxx10 -> H, xxx
8994                                  xx100 -> S, xx
8995                                  x1000 -> D, x
8996    */
8997    if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
8998       HChar   ts     = '?';
8999       UInt    laneNo = 16;
9000       IRExpr* src    = NULL;
9001       if (imm5 & 1) {
9002          src    = unop(Iop_64to8, getIReg64orZR(nn));
9003          laneNo = (imm5 >> 1) & 15;
9004          ts     = 'b';
9005       }
9006       else if (imm5 & 2) {
9007          src    = unop(Iop_64to16, getIReg64orZR(nn));
9008          laneNo = (imm5 >> 2) & 7;
9009          ts     = 'h';
9010       }
9011       else if (imm5 & 4) {
9012          src    = unop(Iop_64to32, getIReg64orZR(nn));
9013          laneNo = (imm5 >> 3) & 3;
9014          ts     = 's';
9015       }
9016       else if (imm5 & 8) {
9017          src    = getIReg64orZR(nn);
9018          laneNo = (imm5 >> 4) & 1;
9019          ts     = 'd';
9020       }
9021       /* */
9022       if (src) {
9023          vassert(laneNo < 16);
9024          putQRegLane(dd, laneNo, src);
9025          DIP("ins %s.%c[%u], %s\n",
9026              nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
9027          return True;
9028       }
9029       /* invalid */
9030       return False;
9031    }
9032 
9033    /* -------- x,0,0101: SMOV -------- */
9034    /* -------- x,0,0111: UMOV -------- */
9035    /* 31  28        20   15     9 4
9036       0q0 01110 000 imm5 001111 n d  UMOV Xd/Wd, Vn.Ts[index]
9037       0q0 01110 000 imm5 001011 n d  SMOV Xd/Wd, Vn.Ts[index]
9038       dest is Xd when q==1, Wd when q==0
9039       UMOV:
9040          Ts,index,ops = case q:imm5 of
9041                           0:xxxx1 -> B, xxxx, 8Uto64
9042                           1:xxxx1 -> invalid
9043                           0:xxx10 -> H, xxx,  16Uto64
9044                           1:xxx10 -> invalid
9045                           0:xx100 -> S, xx,   32Uto64
9046                           1:xx100 -> invalid
9047                           1:x1000 -> D, x,    copy64
9048                           other   -> invalid
9049       SMOV:
9050          Ts,index,ops = case q:imm5 of
9051                           0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
9052                           1:xxxx1 -> B, xxxx, 8Sto64
9053                           0:xxx10 -> H, xxx,  (32Uto64 . 16Sto32)
9054                           1:xxx10 -> H, xxx,  16Sto64
9055                           0:xx100 -> invalid
9056                           1:xx100 -> S, xx,   32Sto64
9057                           1:x1000 -> invalid
9058                           other   -> invalid
9059    */
9060    if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
9061       Bool isU  = (imm4 & 2) == 2;
9062       const HChar* arTs = "??";
9063       UInt    laneNo = 16; /* invalid */
9064       // Setting 'res' to non-NULL determines valid/invalid
9065       IRExpr* res    = NULL;
9066       if (!bitQ && (imm5 & 1)) { // 0:xxxx1
9067          laneNo = (imm5 >> 1) & 15;
9068          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
9069          res = isU ? unop(Iop_8Uto64, lane)
9070                    : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
9071          arTs = "b";
9072       }
9073       else if (bitQ && (imm5 & 1)) { // 1:xxxx1
9074          laneNo = (imm5 >> 1) & 15;
9075          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
9076          res = isU ? NULL
9077                    : unop(Iop_8Sto64, lane);
9078          arTs = "b";
9079       }
9080       else if (!bitQ && (imm5 & 2)) { // 0:xxx10
9081          laneNo = (imm5 >> 2) & 7;
9082          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
9083          res = isU ? unop(Iop_16Uto64, lane)
9084                    : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
9085          arTs = "h";
9086       }
9087       else if (bitQ && (imm5 & 2)) { // 1:xxx10
9088          laneNo = (imm5 >> 2) & 7;
9089          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
9090          res = isU ? NULL
9091                    : unop(Iop_16Sto64, lane);
9092          arTs = "h";
9093       }
9094       else if (!bitQ && (imm5 & 4)) { // 0:xx100
9095          laneNo = (imm5 >> 3) & 3;
9096          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
9097          res = isU ? unop(Iop_32Uto64, lane)
9098                    : NULL;
9099          arTs = "s";
9100       }
9101       else if (bitQ && (imm5 & 4)) { // 1:xxx10
9102          laneNo = (imm5 >> 3) & 3;
9103          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
9104          res = isU ? NULL
9105                    : unop(Iop_32Sto64, lane);
9106          arTs = "s";
9107       }
9108       else if (bitQ && (imm5 & 8)) { // 1:x1000
9109          laneNo = (imm5 >> 4) & 1;
9110          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
9111          res = isU ? lane
9112                    : NULL;
9113          arTs = "d";
9114       }
9115       /* */
9116       if (res) {
9117          vassert(laneNo < 16);
9118          putIReg64orZR(dd, res);
9119          DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
9120              nameIRegOrZR(bitQ == 1, dd),
9121              nameQReg128(nn), arTs, laneNo);
9122          return True;
9123       }
9124       /* invalid */
9125       return False;
9126    }
9127 
9128    /* -------- 1,1,xxxx: INS (element) -------- */
9129    /* 31  28       20     14   9 4
9130       011 01110000 imm5 0 imm4 n d  INS Vd.Ts[ix1], Vn.Ts[ix2]
9131       where Ts,ix1,ix2
9132                = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
9133                               xxx10 -> H, xxx,  imm4[3:1]
9134                               xx100 -> S, xx,   imm4[3:2]
9135                               x1000 -> D, x,    imm4[3:3]
9136    */
9137    if (bitQ == 1 && bitOP == 1) {
9138       HChar   ts  = '?';
9139       IRType  ity = Ity_INVALID;
9140       UInt    ix1 = 16;
9141       UInt    ix2 = 16;
9142       if (imm5 & 1) {
9143          ts  = 'b';
9144          ity = Ity_I8;
9145          ix1 = (imm5 >> 1) & 15;
9146          ix2 = (imm4 >> 0) & 15;
9147       }
9148       else if (imm5 & 2) {
9149          ts  = 'h';
9150          ity = Ity_I16;
9151          ix1 = (imm5 >> 2) & 7;
9152          ix2 = (imm4 >> 1) & 7;
9153       }
9154       else if (imm5 & 4) {
9155          ts  = 's';
9156          ity = Ity_I32;
9157          ix1 = (imm5 >> 3) & 3;
9158          ix2 = (imm4 >> 2) & 3;
9159       }
9160       else if (imm5 & 8) {
9161          ts  = 'd';
9162          ity = Ity_I64;
9163          ix1 = (imm5 >> 4) & 1;
9164          ix2 = (imm4 >> 3) & 1;
9165       }
9166       /* */
9167       if (ity != Ity_INVALID) {
9168          vassert(ix1 < 16);
9169          vassert(ix2 < 16);
9170          putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
9171          DIP("ins %s.%c[%u], %s.%c[%u]\n",
9172              nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
9173          return True;
9174       }
9175       /* invalid */
9176       return False;
9177    }
9178 
9179    return False;
9180 #  undef INSN
9181 }
9182 
9183 
9184 static
dis_AdvSIMD_modified_immediate(DisResult * dres,UInt insn)9185 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
9186 {
9187    /* 31    28          18  15    11 9     4
9188       0q op 01111 00000 abc cmode 01 defgh d
9189       Decode fields: q,op,cmode
9190       Bit 11 is really "o2", but it is always zero.
9191    */
9192 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9193    if (INSN(31,31) != 0
9194        || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
9195        || INSN(11,10) != BITS2(0,1)) {
9196       return False;
9197    }
9198    UInt bitQ     = INSN(30,30);
9199    UInt bitOP    = INSN(29,29);
9200    UInt cmode    = INSN(15,12);
9201    UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
9202    UInt dd       = INSN(4,0);
9203 
9204    ULong imm64lo  = 0;
9205    UInt  op_cmode = (bitOP << 4) | cmode;
9206    Bool  ok       = False;
9207    Bool  isORR    = False;
9208    Bool  isBIC    = False;
9209    Bool  isMOV    = False;
9210    Bool  isMVN    = False;
9211    Bool  isFMOV   = False;
9212    switch (op_cmode) {
9213       /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
9214       /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
9215       /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
9216       /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
9217       case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
9218       case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
9219          ok = True; isMOV = True; break;
9220 
9221       /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
9222       /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
9223       /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
9224       /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
9225       case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
9226       case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
9227          ok = True; isORR = True; break;
9228 
9229       /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
9230       /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
9231       case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
9232          ok = True; isMOV = True; break;
9233 
9234       /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
9235       /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
9236       case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
9237          ok = True; isORR = True; break;
9238 
9239       /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
9240       /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
9241       case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
9242          ok = True; isMOV = True; break;
9243 
9244       /* -------- x,0,1110 MOVI 8-bit -------- */
9245       case BITS5(0,1,1,1,0):
9246          ok = True; isMOV = True; break;
9247 
9248       /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
9249       case BITS5(0,1,1,1,1): // 0:1111
9250          ok = True; isFMOV = True; break;
9251 
9252       /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
9253       /* -------- x,1,0010 MVNI 32-bit shifted imm  -------- */
9254       /* -------- x,1,0100 MVNI 32-bit shifted imm  -------- */
9255       /* -------- x,1,0110 MVNI 32-bit shifted imm  -------- */
9256       case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
9257       case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
9258          ok = True; isMVN = True; break;
9259 
9260       /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
9261       /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
9262       /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
9263       /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
9264       case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
9265       case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
9266          ok = True; isBIC = True; break;
9267 
9268       /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
9269       /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
9270       case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
9271          ok = True; isMVN = True; break;
9272 
9273       /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
9274       /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
9275       case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
9276          ok = True; isBIC = True; break;
9277 
9278       /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
9279       /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
9280       case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
9281          ok = True; isMVN = True; break;
9282 
9283       /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
9284       /* -------- 1,1,1110 MOVI 64-bit vector -------- */
9285       case BITS5(1,1,1,1,0):
9286          ok = True; isMOV = True; break;
9287 
9288       /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
9289       case BITS5(1,1,1,1,1): // 1:1111
9290          ok = bitQ == 1; isFMOV = True; break;
9291 
9292       default:
9293         break;
9294    }
9295    if (ok) {
9296       vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
9297                    + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
9298       ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
9299    }
9300    if (ok) {
9301       if (isORR || isBIC) {
9302          ULong inv
9303             = isORR ? 0ULL : ~0ULL;
9304          IRExpr* immV128
9305             = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
9306          IRExpr* res
9307             = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
9308          const HChar* nm = isORR ? "orr" : "bic";
9309          if (bitQ == 0) {
9310             putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
9311             DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
9312          } else {
9313             putQReg128(dd, res);
9314             DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
9315                 nameQReg128(dd), imm64lo, imm64lo);
9316          }
9317       }
9318       else if (isMOV || isMVN || isFMOV) {
9319          if (isMVN) imm64lo = ~imm64lo;
9320          ULong   imm64hi = bitQ == 0  ? 0  :  imm64lo;
9321          IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
9322                                                  mkU64(imm64lo));
9323          putQReg128(dd, immV128);
9324          DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
9325       }
9326       return True;
9327    }
9328    /* else fall through */
9329 
9330    return False;
9331 #  undef INSN
9332 }
9333 
9334 
9335 static
dis_AdvSIMD_scalar_copy(DisResult * dres,UInt insn)9336 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
9337 {
9338    /* 31    28       20   15 14   10 9 4
9339       01 op 11110000 imm5 0  imm4 1  n d
9340       Decode fields: op,imm4
9341    */
9342 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9343    if (INSN(31,30) != BITS2(0,1)
9344        || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
9345        || INSN(15,15) != 0 || INSN(10,10) != 1) {
9346       return False;
9347    }
9348    UInt bitOP = INSN(29,29);
9349    UInt imm5  = INSN(20,16);
9350    UInt imm4  = INSN(14,11);
9351    UInt nn    = INSN(9,5);
9352    UInt dd    = INSN(4,0);
9353 
9354    if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
9355       /* -------- 0,0000 DUP (element, scalar) -------- */
9356       IRTemp w0     = newTemp(Ity_I64);
9357       const HChar* arTs = "??";
9358       IRType laneTy = Ity_INVALID;
9359       UInt   laneNo = 16; /* invalid */
9360       if (imm5 & 1) {
9361          arTs   = "b";
9362          laneNo = (imm5 >> 1) & 15;
9363          laneTy = Ity_I8;
9364          assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
9365       }
9366       else if (imm5 & 2) {
9367          arTs   = "h";
9368          laneNo = (imm5 >> 2) & 7;
9369          laneTy = Ity_I16;
9370          assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
9371       }
9372       else if (imm5 & 4) {
9373          arTs   = "s";
9374          laneNo = (imm5 >> 3) & 3;
9375          laneTy = Ity_I32;
9376          assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
9377       }
9378       else if (imm5 & 8) {
9379          arTs   = "d";
9380          laneNo = (imm5 >> 4) & 1;
9381          laneTy = Ity_I64;
9382          assign(w0, getQRegLane(nn, laneNo, laneTy));
9383       }
9384       else {
9385          /* invalid; leave laneTy unchanged. */
9386       }
9387       /* */
9388       if (laneTy != Ity_INVALID) {
9389          vassert(laneNo < 16);
9390          putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
9391          DIP("dup %s, %s.%s[%u]\n",
9392              nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
9393          return True;
9394       }
9395       /* else fall through */
9396    }
9397 
9398    return False;
9399 #  undef INSN
9400 }
9401 
9402 
9403 static
dis_AdvSIMD_scalar_pairwise(DisResult * dres,UInt insn)9404 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
9405 {
9406    /* 31   28    23 21    16     11 9 4
9407       01 u 11110 sz 11000 opcode 10 n d
9408       Decode fields: u,sz,opcode
9409    */
9410 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9411    if (INSN(31,30) != BITS2(0,1)
9412        || INSN(28,24) != BITS5(1,1,1,1,0)
9413        || INSN(21,17) != BITS5(1,1,0,0,0)
9414        || INSN(11,10) != BITS2(1,0)) {
9415       return False;
9416    }
9417    UInt bitU   = INSN(29,29);
9418    UInt sz     = INSN(23,22);
9419    UInt opcode = INSN(16,12);
9420    UInt nn     = INSN(9,5);
9421    UInt dd     = INSN(4,0);
9422 
9423    if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9424       /* -------- 0,11,11011 ADDP d_2d -------- */
9425       IRTemp xy = newTempV128();
9426       IRTemp xx = newTempV128();
9427       assign(xy, getQReg128(nn));
9428       assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9429       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9430                           binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9431       DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9432       return True;
9433    }
9434 
9435    if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9436       /* -------- 1,00,01101 ADDP s_2s -------- */
9437       /* -------- 1,01,01101 ADDP d_2d -------- */
9438       Bool   isD   = sz == X01;
9439       IROp   opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9440       IROp   opADD = mkVecADDF(isD ? 3 : 2);
9441       IRTemp src   = newTempV128();
9442       IRTemp argL  = newTempV128();
9443       IRTemp argR  = newTempV128();
9444       assign(src, getQReg128(nn));
9445       assign(argL, unop(opZHI, mkexpr(src)));
9446       assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9447                                                     mkU8(isD ? 8 : 4))));
9448       putQReg128(dd, unop(opZHI,
9449                           triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9450                                               mkexpr(argL), mkexpr(argR))));
9451       DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9452       return True;
9453    }
9454 
9455    if (bitU == 1
9456        && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9457       /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9458       /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9459       /* -------- 1,0x,01111 FMAXP   d_2d, s_2s -------- */
9460       /* -------- 1,1x,01111 FMINP   d_2d, s_2s -------- */
9461       /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9462       Bool   isD   = (sz & 1) == 1;
9463       Bool   isMIN = (sz & 2) == 2;
9464       Bool   isNM  = opcode == BITS5(0,1,1,0,0);
9465       IROp   opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9466       IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
9467       IRTemp src   = newTempV128();
9468       IRTemp argL  = newTempV128();
9469       IRTemp argR  = newTempV128();
9470       assign(src, getQReg128(nn));
9471       assign(argL, unop(opZHI, mkexpr(src)));
9472       assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9473                                                     mkU8(isD ? 8 : 4))));
9474       putQReg128(dd, unop(opZHI,
9475                           binop(opMXX, mkexpr(argL), mkexpr(argR))));
9476       HChar c = isD ? 'd' : 's';
9477       DIP("%s%sp %c%u, v%u.2%c\n",
9478            isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
9479       return True;
9480    }
9481 
9482    return False;
9483 #  undef INSN
9484 }
9485 
9486 
9487 static
dis_AdvSIMD_scalar_shift_by_imm(DisResult * dres,UInt insn)9488 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9489 {
9490    /* 31   28     22   18   15     10 9 4
9491       01 u 111110 immh immb opcode 1  n d
9492       Decode fields: u,immh,opcode
9493    */
9494 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9495    if (INSN(31,30) != BITS2(0,1)
9496        || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9497       return False;
9498    }
9499    UInt bitU   = INSN(29,29);
9500    UInt immh   = INSN(22,19);
9501    UInt immb   = INSN(18,16);
9502    UInt opcode = INSN(15,11);
9503    UInt nn     = INSN(9,5);
9504    UInt dd     = INSN(4,0);
9505    UInt immhb  = (immh << 3) | immb;
9506 
9507    if ((immh & 8) == 8
9508        && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9509       /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9510       /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9511       /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9512       /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9513       Bool isU   = bitU == 1;
9514       Bool isAcc = opcode == BITS5(0,0,0,1,0);
9515       UInt sh    = 128 - immhb;
9516       vassert(sh >= 1 && sh <= 64);
9517       IROp    op  = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9518       IRExpr* src = getQReg128(nn);
9519       IRTemp  shf = newTempV128();
9520       IRTemp  res = newTempV128();
9521       if (sh == 64 && isU) {
9522          assign(shf, mkV128(0x0000));
9523       } else {
9524          UInt nudge = 0;
9525          if (sh == 64) {
9526             vassert(!isU);
9527             nudge = 1;
9528          }
9529          assign(shf, binop(op, src, mkU8(sh - nudge)));
9530       }
9531       assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9532                         : mkexpr(shf));
9533       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9534       const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9535                               : (isU ? "ushr" : "sshr");
9536       DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9537       return True;
9538    }
9539 
9540    if ((immh & 8) == 8
9541        && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9542       /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9543       /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9544       /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9545       /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9546       Bool isU   = bitU == 1;
9547       Bool isAcc = opcode == BITS5(0,0,1,1,0);
9548       UInt sh    = 128 - immhb;
9549       vassert(sh >= 1 && sh <= 64);
9550       IROp    op  = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
9551       vassert(sh >= 1 && sh <= 64);
9552       IRExpr* src  = getQReg128(nn);
9553       IRTemp  imm8 = newTemp(Ity_I8);
9554       assign(imm8, mkU8((UChar)(-sh)));
9555       IRExpr* amt  = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9556       IRTemp  shf  = newTempV128();
9557       IRTemp  res  = newTempV128();
9558       assign(shf, binop(op, src, amt));
9559       assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9560                         : mkexpr(shf));
9561       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9562       const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9563                               : (isU ? "urshr" : "srshr");
9564       DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9565       return True;
9566    }
9567 
9568    if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
9569       /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9570       UInt sh = 128 - immhb;
9571       vassert(sh >= 1 && sh <= 64);
9572       if (sh == 64) {
9573          putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9574       } else {
9575          /* sh is in range 1 .. 63 */
9576          ULong   nmask  = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
9577          IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9578          IRTemp  res    = newTempV128();
9579          assign(res, binop(Iop_OrV128,
9580                            binop(Iop_AndV128, getQReg128(dd), nmaskV),
9581                            binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
9582          putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9583       }
9584       DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
9585       return True;
9586    }
9587 
9588    if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9589       /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9590       UInt sh = immhb - 64;
9591       vassert(sh >= 0 && sh < 64);
9592       putQReg128(dd,
9593                  unop(Iop_ZeroHI64ofV128,
9594                       sh == 0 ? getQReg128(nn)
9595                               : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9596       DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
9597       return True;
9598    }
9599 
9600    if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9601       /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9602       UInt sh = immhb - 64;
9603       vassert(sh >= 0 && sh < 64);
9604       if (sh == 0) {
9605          putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
9606       } else {
9607          /* sh is in range 1 .. 63 */
9608          ULong   nmask  = (1ULL << sh) - 1;
9609          IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9610          IRTemp  res    = newTempV128();
9611          assign(res, binop(Iop_OrV128,
9612                            binop(Iop_AndV128, getQReg128(dd), nmaskV),
9613                            binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9614          putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9615       }
9616       DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
9617       return True;
9618    }
9619 
9620    if (opcode == BITS5(0,1,1,1,0)
9621        || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
9622       /* -------- 0,01110  SQSHL  #imm -------- */
9623       /* -------- 1,01110  UQSHL  #imm -------- */
9624       /* -------- 1,01100  SQSHLU #imm -------- */
9625       UInt size  = 0;
9626       UInt shift = 0;
9627       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9628       if (!ok) return False;
9629       vassert(size >= 0 && size <= 3);
9630       /* The shift encoding has opposite sign for the leftwards case.
9631          Adjust shift to compensate. */
9632       UInt lanebits = 8 << size;
9633       shift = lanebits - shift;
9634       vassert(shift >= 0 && shift < lanebits);
9635       const HChar* nm = NULL;
9636       /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
9637       else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
9638       else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
9639       else vassert(0);
9640       IRTemp qDiff1 = IRTemp_INVALID;
9641       IRTemp qDiff2 = IRTemp_INVALID;
9642       IRTemp res = IRTemp_INVALID;
9643       IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
9644       /* This relies on the fact that the zeroed out lanes generate zeroed
9645          result lanes and don't saturate, so there's no point in trimming
9646          the resulting res, qDiff1 or qDiff2 values. */
9647       math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
9648       putQReg128(dd, mkexpr(res));
9649       updateQCFLAGwithDifference(qDiff1, qDiff2);
9650       const HChar arr = "bhsd"[size];
9651       DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
9652       return True;
9653    }
9654 
9655    if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
9656        || (bitU == 1
9657            && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
9658       /* -------- 0,10010   SQSHRN #imm -------- */
9659       /* -------- 1,10010   UQSHRN #imm -------- */
9660       /* -------- 0,10011  SQRSHRN #imm -------- */
9661       /* -------- 1,10011  UQRSHRN #imm -------- */
9662       /* -------- 1,10000  SQSHRUN #imm -------- */
9663       /* -------- 1,10001 SQRSHRUN #imm -------- */
9664       UInt size  = 0;
9665       UInt shift = 0;
9666       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9667       if (!ok || size == X11) return False;
9668       vassert(size >= X00 && size <= X10);
9669       vassert(shift >= 1 && shift <= (8 << size));
9670       const HChar* nm = "??";
9671       IROp op = Iop_INVALID;
9672       /* Decide on the name and the operation. */
9673       /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9674          nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
9675       }
9676       else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9677          nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
9678       }
9679       else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
9680          nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
9681       }
9682       else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9683          nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
9684       }
9685       else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
9686          nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
9687       }
9688       else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
9689          nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
9690       }
9691       else vassert(0);
9692       /* Compute the result (Q, shifted value) pair. */
9693       IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
9694       IRTemp pair   = newTempV128();
9695       assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
9696       /* Update the result reg */
9697       IRTemp res64in128 = newTempV128();
9698       assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
9699       putQReg128(dd, mkexpr(res64in128));
9700       /* Update the Q flag. */
9701       IRTemp q64q64 = newTempV128();
9702       assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
9703       IRTemp z128 = newTempV128();
9704       assign(z128, mkV128(0x0000));
9705       updateQCFLAGwithDifference(q64q64, z128);
9706       /* */
9707       const HChar arrNarrow = "bhsd"[size];
9708       const HChar arrWide   = "bhsd"[size+1];
9709       DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
9710       return True;
9711    }
9712 
9713    if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
9714       /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
9715       /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
9716       UInt size  = 0;
9717       UInt fbits = 0;
9718       Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9719       /* The following holds because immh is never zero. */
9720       vassert(ok);
9721       /* The following holds because immh >= 0100. */
9722       vassert(size == X10 || size == X11);
9723       Bool isD = size == X11;
9724       Bool isU = bitU == 1;
9725       vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9726       Double  scale  = two_to_the_minus(fbits);
9727       IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9728                              : IRExpr_Const(IRConst_F32( (Float)scale ));
9729       IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
9730       IROp    opCVT  = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
9731                            : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
9732       IRType tyF = isD ? Ity_F64 : Ity_F32;
9733       IRType tyI = isD ? Ity_I64 : Ity_I32;
9734       IRTemp src = newTemp(tyI);
9735       IRTemp res = newTemp(tyF);
9736       IRTemp rm  = mk_get_IR_rounding_mode();
9737       assign(src, getQRegLane(nn, 0, tyI));
9738       assign(res, triop(opMUL, mkexpr(rm),
9739                                binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
9740       putQRegLane(dd, 0, mkexpr(res));
9741       if (!isD) {
9742          putQRegLane(dd, 1, mkU32(0));
9743       }
9744       putQRegLane(dd, 1, mkU64(0));
9745       const HChar ch = isD ? 'd' : 's';
9746       DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
9747           ch, dd, ch, nn, fbits);
9748       return True;
9749    }
9750 
9751    if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
9752       /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
9753       /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
9754       UInt size  = 0;
9755       UInt fbits = 0;
9756       Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9757       /* The following holds because immh is never zero. */
9758       vassert(ok);
9759       /* The following holds because immh >= 0100. */
9760       vassert(size == X10 || size == X11);
9761       Bool isD = size == X11;
9762       Bool isU = bitU == 1;
9763       vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9764       Double  scale  = two_to_the_plus(fbits);
9765       IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9766                            : IRExpr_Const(IRConst_F32( (Float)scale ));
9767       IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
9768       IROp    opCVT  = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
9769                            : (isD ? Iop_F64toI64S : Iop_F32toI32S);
9770       IRType tyF = isD ? Ity_F64 : Ity_F32;
9771       IRType tyI = isD ? Ity_I64 : Ity_I32;
9772       IRTemp src = newTemp(tyF);
9773       IRTemp res = newTemp(tyI);
9774       IRTemp rm  = newTemp(Ity_I32);
9775       assign(src, getQRegLane(nn, 0, tyF));
9776       assign(rm,  mkU32(Irrm_ZERO));
9777       assign(res, binop(opCVT, mkexpr(rm),
9778                                triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
9779       putQRegLane(dd, 0, mkexpr(res));
9780       if (!isD) {
9781          putQRegLane(dd, 1, mkU32(0));
9782       }
9783       putQRegLane(dd, 1, mkU64(0));
9784       const HChar ch = isD ? 'd' : 's';
9785       DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
9786           ch, dd, ch, nn, fbits);
9787       return True;
9788    }
9789 
9790 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9791    return False;
9792 #  undef INSN
9793 }
9794 
9795 
9796 static
dis_AdvSIMD_scalar_three_different(DisResult * dres,UInt insn)9797 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
9798 {
9799    /* 31 29 28    23   21 20 15     11 9 4
9800       01 U  11110 size 1  m  opcode 00 n d
9801       Decode fields: u,opcode
9802    */
9803 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9804    if (INSN(31,30) != BITS2(0,1)
9805        || INSN(28,24) != BITS5(1,1,1,1,0)
9806        || INSN(21,21) != 1
9807        || INSN(11,10) != BITS2(0,0)) {
9808       return False;
9809    }
9810    UInt bitU   = INSN(29,29);
9811    UInt size   = INSN(23,22);
9812    UInt mm     = INSN(20,16);
9813    UInt opcode = INSN(15,12);
9814    UInt nn     = INSN(9,5);
9815    UInt dd     = INSN(4,0);
9816    vassert(size < 4);
9817 
9818    if (bitU == 0
9819        && (opcode == BITS4(1,1,0,1)
9820            || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
9821       /* -------- 0,1101  SQDMULL -------- */ // 0 (ks)
9822       /* -------- 0,1001  SQDMLAL -------- */ // 1
9823       /* -------- 0,1011  SQDMLSL -------- */ // 2
9824       /* Widens, and size refers to the narrowed lanes. */
9825       UInt ks = 3;
9826       switch (opcode) {
9827          case BITS4(1,1,0,1): ks = 0; break;
9828          case BITS4(1,0,0,1): ks = 1; break;
9829          case BITS4(1,0,1,1): ks = 2; break;
9830          default: vassert(0);
9831       }
9832       vassert(ks >= 0 && ks <= 2);
9833       if (size == X00 || size == X11) return False;
9834       vassert(size <= 2);
9835       IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
9836       vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9837       newTempsV128_3(&vecN, &vecM, &vecD);
9838       assign(vecN, getQReg128(nn));
9839       assign(vecM, getQReg128(mm));
9840       assign(vecD, getQReg128(dd));
9841       math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9842                        False/*!is2*/, size, "mas"[ks],
9843                        vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9844       IROp opZHI = mkVecZEROHIxxOFV128(size+1);
9845       putQReg128(dd, unop(opZHI, mkexpr(res)));
9846       vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9847       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9848       if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9849          updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
9850       }
9851       const HChar* nm        = ks == 0 ? "sqdmull"
9852                                        : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9853       const HChar  arrNarrow = "bhsd"[size];
9854       const HChar  arrWide   = "bhsd"[size+1];
9855       DIP("%s %c%u, %c%u, %c%u\n",
9856           nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
9857       return True;
9858    }
9859 
9860    return False;
9861 #  undef INSN
9862 }
9863 
9864 
9865 static
dis_AdvSIMD_scalar_three_same(DisResult * dres,UInt insn)9866 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
9867 {
9868    /* 31 29 28    23   21 20 15     10 9 4
9869       01 U  11110 size 1  m  opcode 1  n d
9870       Decode fields: u,size,opcode
9871    */
9872 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
9873    if (INSN(31,30) != BITS2(0,1)
9874        || INSN(28,24) != BITS5(1,1,1,1,0)
9875        || INSN(21,21) != 1
9876        || INSN(10,10) != 1) {
9877       return False;
9878    }
9879    UInt bitU   = INSN(29,29);
9880    UInt size   = INSN(23,22);
9881    UInt mm     = INSN(20,16);
9882    UInt opcode = INSN(15,11);
9883    UInt nn     = INSN(9,5);
9884    UInt dd     = INSN(4,0);
9885    vassert(size < 4);
9886 
9887    if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
9888       /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
9889       /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
9890       /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
9891       /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
9892       Bool isADD = opcode == BITS5(0,0,0,0,1);
9893       Bool isU   = bitU == 1;
9894       IROp qop   = Iop_INVALID;
9895       IROp nop   = Iop_INVALID;
9896       if (isADD) {
9897          qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
9898          nop = mkVecADD(size);
9899       } else {
9900          qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
9901          nop = mkVecSUB(size);
9902       }
9903       IRTemp argL = newTempV128();
9904       IRTemp argR = newTempV128();
9905       IRTemp qres = newTempV128();
9906       IRTemp nres = newTempV128();
9907       assign(argL, getQReg128(nn));
9908       assign(argR, getQReg128(mm));
9909       assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9910                              size, binop(qop, mkexpr(argL), mkexpr(argR)))));
9911       assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9912                              size, binop(nop, mkexpr(argL), mkexpr(argR)))));
9913       putQReg128(dd, mkexpr(qres));
9914       updateQCFLAGwithDifference(qres, nres);
9915       const HChar* nm  = isADD ? (isU ? "uqadd" : "sqadd")
9916                                : (isU ? "uqsub" : "sqsub");
9917       const HChar  arr = "bhsd"[size];
9918       DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9919       return True;
9920    }
9921 
9922    if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
9923       /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
9924       /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
9925       Bool    isGT = bitU == 0;
9926       IRExpr* argL = getQReg128(nn);
9927       IRExpr* argR = getQReg128(mm);
9928       IRTemp  res  = newTempV128();
9929       assign(res,
9930              isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9931                   : binop(Iop_CmpGT64Ux2, argL, argR));
9932       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9933       DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
9934           nameQRegLO(dd, Ity_I64),
9935           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9936       return True;
9937    }
9938 
9939    if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
9940       /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
9941       /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
9942       Bool    isGE = bitU == 0;
9943       IRExpr* argL = getQReg128(nn);
9944       IRExpr* argR = getQReg128(mm);
9945       IRTemp  res  = newTempV128();
9946       assign(res,
9947              isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
9948                   : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
9949       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9950       DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
9951           nameQRegLO(dd, Ity_I64),
9952           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9953       return True;
9954    }
9955 
9956    if (size == X11 && (opcode == BITS5(0,1,0,0,0)
9957                        || opcode == BITS5(0,1,0,1,0))) {
9958       /* -------- 0,xx,01000 SSHL  d_d_d -------- */
9959       /* -------- 0,xx,01010 SRSHL d_d_d -------- */
9960       /* -------- 1,xx,01000 USHL  d_d_d -------- */
9961       /* -------- 1,xx,01010 URSHL d_d_d -------- */
9962       Bool isU = bitU == 1;
9963       Bool isR = opcode == BITS5(0,1,0,1,0);
9964       IROp op  = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
9965                      : (isU ? mkVecSHU(size)  : mkVecSHS(size));
9966       IRTemp res = newTempV128();
9967       assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9968       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9969       const HChar* nm  = isR ? (isU ? "urshl" : "srshl")
9970                              : (isU ? "ushl"  : "sshl");
9971       DIP("%s %s, %s, %s\n", nm,
9972           nameQRegLO(dd, Ity_I64),
9973           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9974       return True;
9975    }
9976 
9977    if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
9978       /* -------- 0,xx,01001 SQSHL  std4_std4_std4 -------- */
9979       /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
9980       /* -------- 1,xx,01001 UQSHL  std4_std4_std4 -------- */
9981       /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
9982       Bool isU = bitU == 1;
9983       Bool isR = opcode == BITS5(0,1,0,1,1);
9984       IROp op  = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
9985                      : (isU ? mkVecQANDUQSH(size)  : mkVecQANDSQSH(size));
9986       /* This is a bit tricky.  Since we're only interested in the lowest
9987          lane of the result, we zero out all the rest in the operands, so
9988          as to ensure that other lanes don't pollute the returned Q value.
9989          This works because it means, for the lanes we don't care about, we
9990          are shifting zero by zero, which can never saturate. */
9991       IRTemp res256 = newTemp(Ity_V256);
9992       IRTemp resSH  = newTempV128();
9993       IRTemp resQ   = newTempV128();
9994       IRTemp zero   = newTempV128();
9995       assign(
9996          res256,
9997          binop(op,
9998                mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
9999                mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
10000       assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
10001       assign(resQ,  unop(Iop_V256toV128_1, mkexpr(res256)));
10002       assign(zero,  mkV128(0x0000));
10003       putQReg128(dd, mkexpr(resSH));
10004       updateQCFLAGwithDifference(resQ, zero);
10005       const HChar* nm  = isR ? (isU ? "uqrshl" : "sqrshl")
10006                              : (isU ? "uqshl"  : "sqshl");
10007       const HChar  arr = "bhsd"[size];
10008       DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10009       return True;
10010    }
10011 
10012    if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
10013       /* -------- 0,11,10000 ADD d_d_d -------- */
10014       /* -------- 1,11,10000 SUB d_d_d -------- */
10015       Bool   isSUB = bitU == 1;
10016       IRTemp res   = newTemp(Ity_I64);
10017       assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
10018                         getQRegLane(nn, 0, Ity_I64),
10019                         getQRegLane(mm, 0, Ity_I64)));
10020       putQRegLane(dd, 0, mkexpr(res));
10021       putQRegLane(dd, 1, mkU64(0));
10022       DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
10023           nameQRegLO(dd, Ity_I64),
10024           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10025       return True;
10026    }
10027 
10028    if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
10029       /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
10030       /* -------- 1,11,10001 CMEQ  d_d_d -------- */ // ==
10031       Bool    isEQ = bitU == 1;
10032       IRExpr* argL = getQReg128(nn);
10033       IRExpr* argR = getQReg128(mm);
10034       IRTemp  res  = newTempV128();
10035       assign(res,
10036              isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10037                   : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
10038                                             binop(Iop_AndV128, argL, argR),
10039                                             mkV128(0x0000))));
10040       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10041       DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
10042           nameQRegLO(dd, Ity_I64),
10043           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10044       return True;
10045    }
10046 
10047    if (opcode == BITS5(1,0,1,1,0)) {
10048       /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
10049       /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
10050       if (size == X00 || size == X11) return False;
10051       Bool isR = bitU == 1;
10052       IRTemp res, sat1q, sat1n, vN, vM;
10053       res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10054       newTempsV128_2(&vN, &vM);
10055       assign(vN, getQReg128(nn));
10056       assign(vM, getQReg128(mm));
10057       math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10058       putQReg128(dd,
10059                  mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
10060       updateQCFLAGwithDifference(
10061          math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
10062          math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
10063       const HChar  arr = "bhsd"[size];
10064       const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
10065       DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10066       return True;
10067    }
10068 
10069    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
10070       /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
10071       IRType ity = size == X11 ? Ity_F64 : Ity_F32;
10072       IRTemp res = newTemp(ity);
10073       assign(res, unop(mkABSF(ity),
10074                        triop(mkSUBF(ity),
10075                              mkexpr(mk_get_IR_rounding_mode()),
10076                              getQRegLO(nn,ity), getQRegLO(mm,ity))));
10077       putQReg128(dd, mkV128(0x0000));
10078       putQRegLO(dd, mkexpr(res));
10079       DIP("fabd %s, %s, %s\n",
10080           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10081       return True;
10082    }
10083 
10084    if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
10085       /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
10086       // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
10087       IRType ity = size == X01 ? Ity_F64 : Ity_F32;
10088       IRTemp res = newTemp(ity);
10089       assign(res, triop(mkMULF(ity),
10090                         mkexpr(mk_get_IR_rounding_mode()),
10091                         getQRegLO(nn,ity), getQRegLO(mm,ity)));
10092       putQReg128(dd, mkV128(0x0000));
10093       putQRegLO(dd, mkexpr(res));
10094       DIP("fmulx %s, %s, %s\n",
10095           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10096       return True;
10097    }
10098 
10099    if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
10100       /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
10101       /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
10102       Bool   isD   = size == X01;
10103       IRType ity   = isD ? Ity_F64 : Ity_F32;
10104       Bool   isGE  = bitU == 1;
10105       IROp   opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
10106                           : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
10107       IRTemp res   = newTempV128();
10108       assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
10109                        : binop(opCMP, getQReg128(nn), getQReg128(mm)));
10110       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10111                                                              mkexpr(res))));
10112       DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
10113           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10114       return True;
10115    }
10116 
10117    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
10118       /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
10119       Bool   isD   = size == X11;
10120       IRType ity   = isD ? Ity_F64 : Ity_F32;
10121       IROp   opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10122       IRTemp res   = newTempV128();
10123       assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
10124       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10125                                                              mkexpr(res))));
10126       DIP("%s %s, %s, %s\n", "fcmgt",
10127           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10128       return True;
10129    }
10130 
10131    if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
10132       /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
10133       /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
10134       Bool   isD   = (size & 1) == 1;
10135       IRType ity   = isD ? Ity_F64 : Ity_F32;
10136       Bool   isGT  = (size & 2) == 2;
10137       IROp   opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
10138                           : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
10139       IROp   opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
10140       IRTemp res   = newTempV128();
10141       assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
10142                                unop(opABS, getQReg128(nn)))); // swapd
10143       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10144                                                              mkexpr(res))));
10145       DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
10146           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10147       return True;
10148    }
10149 
10150    if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
10151       /* -------- 0,0x,11111: FRECPS  d_d_d, s_s_s -------- */
10152       /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
10153       Bool isSQRT = (size & 2) == 2;
10154       Bool isD    = (size & 1) == 1;
10155       IROp op     = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
10156                            : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
10157       IRTemp res = newTempV128();
10158       assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
10159       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10160                                                              mkexpr(res))));
10161       HChar c = isD ? 'd' : 's';
10162       DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
10163           c, dd, c, nn, c, mm);
10164       return True;
10165    }
10166 
10167    return False;
10168 #  undef INSN
10169 }
10170 
10171 
10172 static
dis_AdvSIMD_scalar_two_reg_misc(DisResult * dres,UInt insn)10173 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
10174 {
10175    /* 31 29 28    23   21    16     11 9 4
10176       01 U  11110 size 10000 opcode 10 n d
10177       Decode fields: u,size,opcode
10178    */
10179 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
10180    if (INSN(31,30) != BITS2(0,1)
10181        || INSN(28,24) != BITS5(1,1,1,1,0)
10182        || INSN(21,17) != BITS5(1,0,0,0,0)
10183        || INSN(11,10) != BITS2(1,0)) {
10184       return False;
10185    }
10186    UInt bitU   = INSN(29,29);
10187    UInt size   = INSN(23,22);
10188    UInt opcode = INSN(16,12);
10189    UInt nn     = INSN(9,5);
10190    UInt dd     = INSN(4,0);
10191    vassert(size < 4);
10192 
10193    if (opcode == BITS5(0,0,0,1,1)) {
10194       /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
10195       /* -------- 1,xx,00011: USQADD std4_std4 -------- */
10196       /* These are a bit tricky (to say the least).  See comments on
10197          the vector variants (in dis_AdvSIMD_two_reg_misc) below for
10198          details. */
10199       Bool   isUSQADD = bitU == 1;
10200       IROp   qop  = isUSQADD ? mkVecQADDEXTSUSATUU(size)
10201                              : mkVecQADDEXTUSSATSS(size);
10202       IROp   nop  = mkVecADD(size);
10203       IRTemp argL = newTempV128();
10204       IRTemp argR = newTempV128();
10205       assign(argL, getQReg128(nn));
10206       assign(argR, getQReg128(dd));
10207       IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10208                        size, binop(qop, mkexpr(argL), mkexpr(argR)));
10209       IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10210                        size, binop(nop, mkexpr(argL), mkexpr(argR)));
10211       putQReg128(dd, mkexpr(qres));
10212       updateQCFLAGwithDifference(qres, nres);
10213       const HChar arr = "bhsd"[size];
10214       DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
10215       return True;
10216    }
10217 
10218    if (opcode == BITS5(0,0,1,1,1)) {
10219       /* -------- 0,xx,00111 SQABS std4_std4 -------- */
10220       /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
10221       Bool isNEG = bitU == 1;
10222       IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
10223       (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
10224                                          getQReg128(nn), size );
10225       IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
10226       IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
10227       putQReg128(dd, mkexpr(qres));
10228       updateQCFLAGwithDifference(qres, nres);
10229       const HChar arr = "bhsd"[size];
10230       DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
10231       return True;
10232    }
10233 
10234    if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
10235       /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
10236       /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
10237       Bool    isGT = bitU == 0;
10238       IRExpr* argL = getQReg128(nn);
10239       IRExpr* argR = mkV128(0x0000);
10240       IRTemp  res  = newTempV128();
10241       assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
10242                        : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
10243       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10244       DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
10245       return True;
10246    }
10247 
10248    if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
10249       /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
10250       /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
10251       Bool    isEQ = bitU == 0;
10252       IRExpr* argL = getQReg128(nn);
10253       IRExpr* argR = mkV128(0x0000);
10254       IRTemp  res  = newTempV128();
10255       assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10256                        : unop(Iop_NotV128,
10257                               binop(Iop_CmpGT64Sx2, argL, argR)));
10258       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10259       DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
10260       return True;
10261    }
10262 
10263    if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
10264       /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
10265       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10266                           binop(Iop_CmpGT64Sx2, mkV128(0x0000),
10267                                                 getQReg128(nn))));
10268       DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
10269       return True;
10270    }
10271 
10272    if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10273       /* -------- 0,11,01011 ABS d_d -------- */
10274       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10275                           unop(Iop_Abs64x2, getQReg128(nn))));
10276       DIP("abs d%u, d%u\n", dd, nn);
10277       return True;
10278    }
10279 
10280    if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10281       /* -------- 1,11,01011 NEG d_d -------- */
10282       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10283                           binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
10284       DIP("neg d%u, d%u\n", dd, nn);
10285       return True;
10286    }
10287 
10288    UInt ix = 0; /*INVALID*/
10289    if (size >= X10) {
10290       switch (opcode) {
10291          case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
10292          case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
10293          case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
10294          default: break;
10295       }
10296    }
10297    if (ix > 0) {
10298       /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
10299       /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
10300       /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
10301       /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
10302       /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
10303       Bool   isD     = size == X11;
10304       IRType ity     = isD ? Ity_F64 : Ity_F32;
10305       IROp   opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
10306       IROp   opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
10307       IROp   opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10308       IROp   opCmp   = Iop_INVALID;
10309       Bool   swap    = False;
10310       const HChar* nm = "??";
10311       switch (ix) {
10312          case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
10313          case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
10314          case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
10315          case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
10316          case 5: nm = "fcmle"; opCmp = opCmpLE; break;
10317          default: vassert(0);
10318       }
10319       IRExpr* zero = mkV128(0x0000);
10320       IRTemp res = newTempV128();
10321       assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
10322                        : binop(opCmp, getQReg128(nn), zero));
10323       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10324                                                              mkexpr(res))));
10325 
10326       DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10327       return True;
10328    }
10329 
10330    if (opcode == BITS5(1,0,1,0,0)
10331        || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
10332       /* -------- 0,xx,10100: SQXTN -------- */
10333       /* -------- 1,xx,10100: UQXTN -------- */
10334       /* -------- 1,xx,10010: SQXTUN -------- */
10335       if (size == X11) return False;
10336       vassert(size < 3);
10337       IROp  opN    = Iop_INVALID;
10338       Bool  zWiden = True;
10339       const HChar* nm = "??";
10340       /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
10341          opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
10342       }
10343       else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
10344          opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
10345       }
10346       else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10347          opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
10348       }
10349       else vassert(0);
10350       IRTemp src  = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10351                        size+1, getQReg128(nn));
10352       IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10353                        size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
10354       putQReg128(dd, mkexpr(resN));
10355       /* This widens zero lanes to zero, and compares it against zero, so all
10356          of the non-participating lanes make no contribution to the
10357          Q flag state. */
10358       IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
10359                                               size, mkexpr(resN));
10360       updateQCFLAGwithDifference(src, resW);
10361       const HChar arrNarrow = "bhsd"[size];
10362       const HChar arrWide   = "bhsd"[size+1];
10363       DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
10364       return True;
10365    }
10366 
10367    if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
10368       /* -------- 1,01,10110 FCVTXN s_d -------- */
10369       /* Using Irrm_NEAREST here isn't right.  The docs say "round to
10370          odd" but I don't know what that really means. */
10371       putQRegLO(dd,
10372                 binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
10373                                     getQRegLO(nn, Ity_F64)));
10374       putQRegLane(dd, 1, mkU32(0));
10375       putQRegLane(dd, 1, mkU64(0));
10376       DIP("fcvtxn s%u, d%u\n", dd, nn);
10377       return True;
10378    }
10379 
10380    ix = 0; /*INVALID*/
10381    switch (opcode) {
10382       case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
10383       case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
10384       case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
10385       default: break;
10386    }
10387    if (ix > 0) {
10388       /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10389       /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10390       /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10391       /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10392       /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10393       /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10394       /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10395       /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10396       /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10397       /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10398       Bool           isD  = (size & 1) == 1;
10399       IRType         tyF  = isD ? Ity_F64 : Ity_F32;
10400       IRType         tyI  = isD ? Ity_I64 : Ity_I32;
10401       IRRoundingMode irrm = 8; /*impossible*/
10402       HChar          ch   = '?';
10403       switch (ix) {
10404          case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
10405          case 2: ch = 'm'; irrm = Irrm_NegINF;  break;
10406          case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
10407          case 4: ch = 'p'; irrm = Irrm_PosINF;  break;
10408          case 5: ch = 'z'; irrm = Irrm_ZERO;    break;
10409          default: vassert(0);
10410       }
10411       IROp cvt = Iop_INVALID;
10412       if (bitU == 1) {
10413          cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
10414       } else {
10415          cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
10416       }
10417       IRTemp src = newTemp(tyF);
10418       IRTemp res = newTemp(tyI);
10419       assign(src, getQRegLane(nn, 0, tyF));
10420       assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
10421       putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
10422       if (!isD) {
10423          putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10424       }
10425       putQRegLane(dd, 1, mkU64(0));    /* bits 127-64 */
10426       HChar sOrD = isD ? 'd' : 's';
10427       DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
10428           sOrD, dd, sOrD, nn);
10429       return True;
10430    }
10431 
10432    if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
10433       /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10434       /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10435       Bool   isU = bitU == 1;
10436       Bool   isD = (size & 1) == 1;
10437       IRType tyI = isD ? Ity_I64 : Ity_I32;
10438       IROp   iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10439                        : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10440       IRTemp rm  = mk_get_IR_rounding_mode();
10441       putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
10442       if (!isD) {
10443          putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10444       }
10445       putQRegLane(dd, 1, mkU64(0));    /* bits 127-64 */
10446       HChar c = isD ? 'd' : 's';
10447       DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
10448       return True;
10449    }
10450 
10451    if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
10452       /* -------- 0,1x,11101: FRECPE  d_d, s_s -------- */
10453       /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10454       Bool isSQRT = bitU == 1;
10455       Bool isD    = (size & 1) == 1;
10456       IROp op     = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
10457                            : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
10458       IRTemp resV = newTempV128();
10459       assign(resV, unop(op, getQReg128(nn)));
10460       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10461                                                              mkexpr(resV))));
10462       HChar c = isD ? 'd' : 's';
10463       DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
10464       return True;
10465    }
10466 
10467    if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
10468       /* -------- 0,1x,11111: FRECPX  d_d, s_s -------- */
10469       Bool   isD = (size & 1) == 1;
10470       IRType ty  = isD ? Ity_F64 : Ity_F32;
10471       IROp   op  = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
10472       IRTemp res = newTemp(ty);
10473       IRTemp rm  = mk_get_IR_rounding_mode();
10474       assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
10475       putQReg128(dd, mkV128(0x0000));
10476       putQRegLane(dd, 0, mkexpr(res));
10477       HChar c = isD ? 'd' : 's';
10478       DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
10479       return True;
10480    }
10481 
10482    return False;
10483 #  undef INSN
10484 }
10485 
10486 
10487 static
dis_AdvSIMD_scalar_x_indexed_element(DisResult * dres,UInt insn)10488 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
10489 {
10490    /* 31   28    23   21 20 19 15     11   9 4
10491       01 U 11111 size L  M  m  opcode H  0 n d
10492       Decode fields are: u,size,opcode
10493       M is really part of the mm register number.  Individual
10494       cases need to inspect L and H though.
10495    */
10496 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
10497    if (INSN(31,30) != BITS2(0,1)
10498        || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
10499       return False;
10500    }
10501    UInt bitU   = INSN(29,29);
10502    UInt size   = INSN(23,22);
10503    UInt bitL   = INSN(21,21);
10504    UInt bitM   = INSN(20,20);
10505    UInt mmLO4  = INSN(19,16);
10506    UInt opcode = INSN(15,12);
10507    UInt bitH   = INSN(11,11);
10508    UInt nn     = INSN(9,5);
10509    UInt dd     = INSN(4,0);
10510    vassert(size < 4);
10511    vassert(bitH < 2 && bitM < 2 && bitL < 2);
10512 
10513    if (bitU == 0 && size >= X10
10514        && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
10515       /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10516       /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10517       Bool isD   = (size & 1) == 1;
10518       Bool isSUB = opcode == BITS4(0,1,0,1);
10519       UInt index;
10520       if      (!isD)             index = (bitH << 1) | bitL;
10521       else if (isD && bitL == 0) index = bitH;
10522       else return False; // sz:L == x11 => unallocated encoding
10523       vassert(index < (isD ? 2 : 4));
10524       IRType ity   = isD ? Ity_F64 : Ity_F32;
10525       IRTemp elem  = newTemp(ity);
10526       UInt   mm    = (bitM << 4) | mmLO4;
10527       assign(elem, getQRegLane(mm, index, ity));
10528       IRTemp dupd  = math_DUP_TO_V128(elem, ity);
10529       IROp   opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
10530       IROp   opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
10531       IROp   opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10532       IRTemp rm    = mk_get_IR_rounding_mode();
10533       IRTemp t1    = newTempV128();
10534       IRTemp t2    = newTempV128();
10535       // FIXME: double rounding; use FMA primops instead
10536       assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10537       assign(t2, triop(isSUB ? opSUB : opADD,
10538                        mkexpr(rm), getQReg128(dd), mkexpr(t1)));
10539       putQReg128(dd,
10540                  mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10541                                                          mkexpr(t2))));
10542       const HChar c = isD ? 'd' : 's';
10543       DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
10544           c, dd, c, nn, nameQReg128(mm), c, index);
10545       return True;
10546    }
10547 
10548    if (size >= X10 && opcode == BITS4(1,0,0,1)) {
10549       /* -------- 0,1x,1001 FMUL  d_d_d[], s_s_s[] -------- */
10550       /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10551       Bool isD    = (size & 1) == 1;
10552       Bool isMULX = bitU == 1;
10553       UInt index;
10554       if      (!isD)             index = (bitH << 1) | bitL;
10555       else if (isD && bitL == 0) index = bitH;
10556       else return False; // sz:L == x11 => unallocated encoding
10557       vassert(index < (isD ? 2 : 4));
10558       IRType ity   = isD ? Ity_F64 : Ity_F32;
10559       IRTemp elem  = newTemp(ity);
10560       UInt   mm    = (bitM << 4) | mmLO4;
10561       assign(elem, getQRegLane(mm, index, ity));
10562       IRTemp dupd  = math_DUP_TO_V128(elem, ity);
10563       IROp   opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10564       IRTemp rm    = mk_get_IR_rounding_mode();
10565       IRTemp t1    = newTempV128();
10566       // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
10567       assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10568       putQReg128(dd,
10569                  mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10570                                                          mkexpr(t1))));
10571       const HChar c = isD ? 'd' : 's';
10572       DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
10573           c, dd, c, nn, nameQReg128(mm), c, index);
10574       return True;
10575    }
10576 
10577    if (bitU == 0
10578        && (opcode == BITS4(1,0,1,1)
10579            || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
10580       /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10581       /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10582       /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10583       /* Widens, and size refers to the narrowed lanes. */
10584       UInt ks = 3;
10585       switch (opcode) {
10586          case BITS4(1,0,1,1): ks = 0; break;
10587          case BITS4(0,0,1,1): ks = 1; break;
10588          case BITS4(0,1,1,1): ks = 2; break;
10589          default: vassert(0);
10590       }
10591       vassert(ks >= 0 && ks <= 2);
10592       UInt mm  = 32; // invalid
10593       UInt ix  = 16; // invalid
10594       switch (size) {
10595          case X00:
10596             return False; // h_b_b[] case is not allowed
10597          case X01:
10598             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10599          case X10:
10600             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10601          case X11:
10602             return False; // q_d_d[] case is not allowed
10603          default:
10604             vassert(0);
10605       }
10606       vassert(mm < 32 && ix < 16);
10607       IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
10608       vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10609       newTempsV128_2(&vecN, &vecD);
10610       assign(vecN, getQReg128(nn));
10611       IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10612       assign(vecD, getQReg128(dd));
10613       math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10614                        False/*!is2*/, size, "mas"[ks],
10615                        vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10616       IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10617       putQReg128(dd, unop(opZHI, mkexpr(res)));
10618       vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10619       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10620       if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10621          updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10622       }
10623       const HChar* nm        = ks == 0 ? "sqmull"
10624                                        : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10625       const HChar  arrNarrow = "bhsd"[size];
10626       const HChar  arrWide   = "bhsd"[size+1];
10627       DIP("%s %c%u, %c%u, v%u.%c[%u]\n",
10628           nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
10629       return True;
10630    }
10631 
10632    if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
10633       /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10634       /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10635       UInt mm  = 32; // invalid
10636       UInt ix  = 16; // invalid
10637       switch (size) {
10638          case X00:
10639             return False; // b case is not allowed
10640          case X01:
10641             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10642          case X10:
10643             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10644          case X11:
10645             return False; // q case is not allowed
10646          default:
10647             vassert(0);
10648       }
10649       vassert(mm < 32 && ix < 16);
10650       Bool isR = opcode == BITS4(1,1,0,1);
10651       IRTemp res, sat1q, sat1n, vN, vM;
10652       res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10653       vN = newTempV128();
10654       assign(vN, getQReg128(nn));
10655       vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10656       math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10657       IROp opZHI = mkVecZEROHIxxOFV128(size);
10658       putQReg128(dd, unop(opZHI, mkexpr(res)));
10659       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10660       const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
10661       HChar ch         = size == X01 ? 'h' : 's';
10662       DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
10663       return True;
10664    }
10665 
10666    return False;
10667 #  undef INSN
10668 }
10669 
10670 
10671 static
dis_AdvSIMD_shift_by_immediate(DisResult * dres,UInt insn)10672 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10673 {
10674    /* 31    28     22   18   15     10 9 4
10675       0 q u 011110 immh immb opcode 1  n d
10676       Decode fields: u,opcode
10677    */
10678 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
10679    if (INSN(31,31) != 0
10680        || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
10681       return False;
10682    }
10683    UInt bitQ   = INSN(30,30);
10684    UInt bitU   = INSN(29,29);
10685    UInt immh   = INSN(22,19);
10686    UInt immb   = INSN(18,16);
10687    UInt opcode = INSN(15,11);
10688    UInt nn     = INSN(9,5);
10689    UInt dd     = INSN(4,0);
10690 
10691    if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
10692       /* -------- 0,00000 SSHR std7_std7_#imm -------- */
10693       /* -------- 1,00000 USHR std7_std7_#imm -------- */
10694       /* -------- 0,00010 SSRA std7_std7_#imm -------- */
10695       /* -------- 1,00010 USRA std7_std7_#imm -------- */
10696       /* laneTy, shift = case immh:immb of
10697                          0001:xxx -> B, SHR:8-xxx
10698                          001x:xxx -> H, SHR:16-xxxx
10699                          01xx:xxx -> S, SHR:32-xxxxx
10700                          1xxx:xxx -> D, SHR:64-xxxxxx
10701                          other    -> invalid
10702       */
10703       UInt size  = 0;
10704       UInt shift = 0;
10705       Bool isQ   = bitQ == 1;
10706       Bool isU   = bitU == 1;
10707       Bool isAcc = opcode == BITS5(0,0,0,1,0);
10708       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10709       if (!ok || (bitQ == 0 && size == X11)) return False;
10710       vassert(size >= 0 && size <= 3);
10711       UInt lanebits = 8 << size;
10712       vassert(shift >= 1 && shift <= lanebits);
10713       IROp    op  = isU ? mkVecSHRN(size) : mkVecSARN(size);
10714       IRExpr* src = getQReg128(nn);
10715       IRTemp  shf = newTempV128();
10716       IRTemp  res = newTempV128();
10717       if (shift == lanebits && isU) {
10718          assign(shf, mkV128(0x0000));
10719       } else {
10720          UInt nudge = 0;
10721          if (shift == lanebits) {
10722             vassert(!isU);
10723             nudge = 1;
10724          }
10725          assign(shf, binop(op, src, mkU8(shift - nudge)));
10726       }
10727       assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10728                         : mkexpr(shf));
10729       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10730       HChar laneCh = "bhsd"[size];
10731       UInt  nLanes = (isQ ? 128 : 64) / lanebits;
10732       const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
10733                               : (isU ? "ushr" : "sshr");
10734       DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10735           nameQReg128(dd), nLanes, laneCh,
10736           nameQReg128(nn), nLanes, laneCh, shift);
10737       return True;
10738    }
10739 
10740    if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
10741       /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
10742       /* -------- 1,00100 URSHR std7_std7_#imm -------- */
10743       /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
10744       /* -------- 1,00110 URSRA std7_std7_#imm -------- */
10745       /* laneTy, shift = case immh:immb of
10746                          0001:xxx -> B, SHR:8-xxx
10747                          001x:xxx -> H, SHR:16-xxxx
10748                          01xx:xxx -> S, SHR:32-xxxxx
10749                          1xxx:xxx -> D, SHR:64-xxxxxx
10750                          other    -> invalid
10751       */
10752       UInt size  = 0;
10753       UInt shift = 0;
10754       Bool isQ   = bitQ == 1;
10755       Bool isU   = bitU == 1;
10756       Bool isAcc = opcode == BITS5(0,0,1,1,0);
10757       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10758       if (!ok || (bitQ == 0 && size == X11)) return False;
10759       vassert(size >= 0 && size <= 3);
10760       UInt lanebits = 8 << size;
10761       vassert(shift >= 1 && shift <= lanebits);
10762       IROp    op   = isU ? mkVecRSHU(size) : mkVecRSHS(size);
10763       IRExpr* src  = getQReg128(nn);
10764       IRTemp  imm8 = newTemp(Ity_I8);
10765       assign(imm8, mkU8((UChar)(-shift)));
10766       IRExpr* amt  = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
10767       IRTemp  shf  = newTempV128();
10768       IRTemp  res  = newTempV128();
10769       assign(shf, binop(op, src, amt));
10770       assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10771                         : mkexpr(shf));
10772       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10773       HChar laneCh = "bhsd"[size];
10774       UInt  nLanes = (isQ ? 128 : 64) / lanebits;
10775       const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
10776                               : (isU ? "urshr" : "srshr");
10777       DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10778           nameQReg128(dd), nLanes, laneCh,
10779           nameQReg128(nn), nLanes, laneCh, shift);
10780       return True;
10781    }
10782 
10783    if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
10784       /* -------- 1,01000 SRI std7_std7_#imm -------- */
10785       /* laneTy, shift = case immh:immb of
10786                          0001:xxx -> B, SHR:8-xxx
10787                          001x:xxx -> H, SHR:16-xxxx
10788                          01xx:xxx -> S, SHR:32-xxxxx
10789                          1xxx:xxx -> D, SHR:64-xxxxxx
10790                          other    -> invalid
10791       */
10792       UInt size  = 0;
10793       UInt shift = 0;
10794       Bool isQ   = bitQ == 1;
10795       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10796       if (!ok || (bitQ == 0 && size == X11)) return False;
10797       vassert(size >= 0 && size <= 3);
10798       UInt lanebits = 8 << size;
10799       vassert(shift >= 1 && shift <= lanebits);
10800       IRExpr* src = getQReg128(nn);
10801       IRTemp  res = newTempV128();
10802       if (shift == lanebits) {
10803          assign(res, getQReg128(dd));
10804       } else {
10805          assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
10806          IRExpr* nmask = binop(mkVecSHLN(size),
10807                                mkV128(0xFFFF), mkU8(lanebits - shift));
10808          IRTemp  tmp   = newTempV128();
10809          assign(tmp, binop(Iop_OrV128,
10810                            mkexpr(res),
10811                            binop(Iop_AndV128, getQReg128(dd), nmask)));
10812          res = tmp;
10813       }
10814       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10815       HChar laneCh = "bhsd"[size];
10816       UInt  nLanes = (isQ ? 128 : 64) / lanebits;
10817       DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
10818           nameQReg128(dd), nLanes, laneCh,
10819           nameQReg128(nn), nLanes, laneCh, shift);
10820       return True;
10821    }
10822 
10823    if (opcode == BITS5(0,1,0,1,0)) {
10824       /* -------- 0,01010 SHL std7_std7_#imm -------- */
10825       /* -------- 1,01010 SLI std7_std7_#imm -------- */
10826       /* laneTy, shift = case immh:immb of
10827                          0001:xxx -> B, xxx
10828                          001x:xxx -> H, xxxx
10829                          01xx:xxx -> S, xxxxx
10830                          1xxx:xxx -> D, xxxxxx
10831                          other    -> invalid
10832       */
10833       UInt size  = 0;
10834       UInt shift = 0;
10835       Bool isSLI = bitU == 1;
10836       Bool isQ   = bitQ == 1;
10837       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10838       if (!ok || (bitQ == 0 && size == X11)) return False;
10839       vassert(size >= 0 && size <= 3);
10840       /* The shift encoding has opposite sign for the leftwards case.
10841          Adjust shift to compensate. */
10842       UInt lanebits = 8 << size;
10843       shift = lanebits - shift;
10844       vassert(shift >= 0 && shift < lanebits);
10845       IROp    op  = mkVecSHLN(size);
10846       IRExpr* src = getQReg128(nn);
10847       IRTemp  res = newTempV128();
10848       if (shift == 0) {
10849          assign(res, src);
10850       } else {
10851          assign(res, binop(op, src, mkU8(shift)));
10852          if (isSLI) {
10853             IRExpr* nmask = binop(mkVecSHRN(size),
10854                                   mkV128(0xFFFF), mkU8(lanebits - shift));
10855             IRTemp  tmp   = newTempV128();
10856             assign(tmp, binop(Iop_OrV128,
10857                               mkexpr(res),
10858                               binop(Iop_AndV128, getQReg128(dd), nmask)));
10859             res = tmp;
10860          }
10861       }
10862       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10863       HChar laneCh = "bhsd"[size];
10864       UInt  nLanes = (isQ ? 128 : 64) / lanebits;
10865       const HChar* nm = isSLI ? "sli" : "shl";
10866       DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10867           nameQReg128(dd), nLanes, laneCh,
10868           nameQReg128(nn), nLanes, laneCh, shift);
10869       return True;
10870    }
10871 
10872    if (opcode == BITS5(0,1,1,1,0)
10873        || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
10874       /* -------- 0,01110  SQSHL  std7_std7_#imm -------- */
10875       /* -------- 1,01110  UQSHL  std7_std7_#imm -------- */
10876       /* -------- 1,01100  SQSHLU std7_std7_#imm -------- */
10877       UInt size  = 0;
10878       UInt shift = 0;
10879       Bool isQ   = bitQ == 1;
10880       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10881       if (!ok || (bitQ == 0 && size == X11)) return False;
10882       vassert(size >= 0 && size <= 3);
10883       /* The shift encoding has opposite sign for the leftwards case.
10884          Adjust shift to compensate. */
10885       UInt lanebits = 8 << size;
10886       shift = lanebits - shift;
10887       vassert(shift >= 0 && shift < lanebits);
10888       const HChar* nm = NULL;
10889       /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
10890       else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
10891       else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
10892       else vassert(0);
10893       IRTemp qDiff1 = IRTemp_INVALID;
10894       IRTemp qDiff2 = IRTemp_INVALID;
10895       IRTemp res = IRTemp_INVALID;
10896       IRTemp src = newTempV128();
10897       assign(src, getQReg128(nn));
10898       math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
10899       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10900       updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
10901                                     isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
10902       const HChar* arr = nameArr_Q_SZ(bitQ, size);
10903       DIP("%s %s.%s, %s.%s, #%u\n", nm,
10904           nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
10905       return True;
10906    }
10907 
10908    if (bitU == 0
10909        && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10910       /* -------- 0,10000  SHRN{,2} #imm -------- */
10911       /* -------- 0,10001 RSHRN{,2} #imm -------- */
10912       /* Narrows, and size is the narrow size. */
10913       UInt size  = 0;
10914       UInt shift = 0;
10915       Bool is2   = bitQ == 1;
10916       Bool isR   = opcode == BITS5(1,0,0,0,1);
10917       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10918       if (!ok || size == X11) return False;
10919       vassert(shift >= 1);
10920       IRTemp t1 = newTempV128();
10921       IRTemp t2 = newTempV128();
10922       IRTemp t3 = newTempV128();
10923       assign(t1, getQReg128(nn));
10924       assign(t2, isR ? binop(mkVecADD(size+1),
10925                              mkexpr(t1),
10926                              mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
10927                      : mkexpr(t1));
10928       assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
10929       IRTemp t4 = math_NARROW_LANES(t3, t3, size);
10930       putLO64andZUorPutHI64(is2, dd, t4);
10931       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10932       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10933       DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
10934           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10935       return True;
10936    }
10937 
10938    if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
10939        || (bitU == 1
10940            && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
10941       /* -------- 0,10010   SQSHRN{,2} #imm -------- */
10942       /* -------- 1,10010   UQSHRN{,2} #imm -------- */
10943       /* -------- 0,10011  SQRSHRN{,2} #imm -------- */
10944       /* -------- 1,10011  UQRSHRN{,2} #imm -------- */
10945       /* -------- 1,10000  SQSHRUN{,2} #imm -------- */
10946       /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
10947       UInt size  = 0;
10948       UInt shift = 0;
10949       Bool is2   = bitQ == 1;
10950       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10951       if (!ok || size == X11) return False;
10952       vassert(shift >= 1 && shift <= (8 << size));
10953       const HChar* nm = "??";
10954       IROp op = Iop_INVALID;
10955       /* Decide on the name and the operation. */
10956       /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10957          nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
10958       }
10959       else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10960          nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
10961       }
10962       else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
10963          nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
10964       }
10965       else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10966          nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
10967       }
10968       else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
10969          nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
10970       }
10971       else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
10972          nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
10973       }
10974       else vassert(0);
10975       /* Compute the result (Q, shifted value) pair. */
10976       IRTemp src128 = newTempV128();
10977       assign(src128, getQReg128(nn));
10978       IRTemp pair = newTempV128();
10979       assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
10980       /* Update the result reg */
10981       IRTemp res64in128 = newTempV128();
10982       assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
10983       putLO64andZUorPutHI64(is2, dd, res64in128);
10984       /* Update the Q flag. */
10985       IRTemp q64q64 = newTempV128();
10986       assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
10987       IRTemp z128 = newTempV128();
10988       assign(z128, mkV128(0x0000));
10989       updateQCFLAGwithDifference(q64q64, z128);
10990       /* */
10991       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10992       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
10993       DIP("%s %s.%s, %s.%s, #%u\n", nm,
10994           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10995       return True;
10996    }
10997 
10998    if (opcode == BITS5(1,0,1,0,0)) {
10999       /* -------- 0,10100 SSHLL{,2} #imm -------- */
11000       /* -------- 1,10100 USHLL{,2} #imm -------- */
11001       /* 31  28     22   18   15     9 4
11002          0q0 011110 immh immb 101001 n d  SSHLL Vd.Ta, Vn.Tb, #sh
11003          0q1 011110 immh immb 101001 n d  USHLL Vd.Ta, Vn.Tb, #sh
11004          where Ta,Tb,sh
11005            = case immh of 1xxx -> invalid
11006                           01xx -> 2d, 2s(q0)/4s(q1),  immh:immb - 32 (0..31)
11007                           001x -> 4s, 4h(q0)/8h(q1),  immh:immb - 16 (0..15)
11008                           0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8  (0..7)
11009                           0000 -> AdvSIMD modified immediate (???)
11010       */
11011       Bool    isQ   = bitQ == 1;
11012       Bool    isU   = bitU == 1;
11013       UInt    immhb = (immh << 3) | immb;
11014       IRTemp  src   = newTempV128();
11015       IRTemp  zero  = newTempV128();
11016       IRExpr* res   = NULL;
11017       UInt    sh    = 0;
11018       const HChar* ta = "??";
11019       const HChar* tb = "??";
11020       assign(src, getQReg128(nn));
11021       assign(zero, mkV128(0x0000));
11022       if (immh & 8) {
11023          /* invalid; don't assign to res */
11024       }
11025       else if (immh & 4) {
11026          sh = immhb - 32;
11027          vassert(sh < 32); /* so 32-sh is 1..32 */
11028          ta = "2d";
11029          tb = isQ ? "4s" : "2s";
11030          IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
11031                            : mk_InterleaveLO32x4(src, zero);
11032          res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
11033       }
11034       else if (immh & 2) {
11035          sh = immhb - 16;
11036          vassert(sh < 16); /* so 16-sh is 1..16 */
11037          ta = "4s";
11038          tb = isQ ? "8h" : "4h";
11039          IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
11040                            : mk_InterleaveLO16x8(src, zero);
11041          res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
11042       }
11043       else if (immh & 1) {
11044          sh = immhb - 8;
11045          vassert(sh < 8); /* so 8-sh is 1..8 */
11046          ta = "8h";
11047          tb = isQ ? "16b" : "8b";
11048          IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
11049                            : mk_InterleaveLO8x16(src, zero);
11050          res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
11051       } else {
11052          vassert(immh == 0);
11053          /* invalid; don't assign to res */
11054       }
11055       /* */
11056       if (res) {
11057          putQReg128(dd, res);
11058          DIP("%cshll%s %s.%s, %s.%s, #%u\n",
11059              isU ? 'u' : 's', isQ ? "2" : "",
11060              nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
11061          return True;
11062       }
11063       return False;
11064    }
11065 
11066    if (opcode == BITS5(1,1,1,0,0)) {
11067       /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11068       /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11069       /* If immh is of the form 00xx, the insn is invalid. */
11070       if (immh < BITS4(0,1,0,0)) return False;
11071       UInt size  = 0;
11072       UInt fbits = 0;
11073       Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
11074       /* The following holds because immh is never zero. */
11075       vassert(ok);
11076       /* The following holds because immh >= 0100. */
11077       vassert(size == X10 || size == X11);
11078       Bool isD = size == X11;
11079       Bool isU = bitU == 1;
11080       Bool isQ = bitQ == 1;
11081       if (isD && !isQ) return False; /* reject .1d case */
11082       vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
11083       Double  scale  = two_to_the_minus(fbits);
11084       IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
11085                            : IRExpr_Const(IRConst_F32( (Float)scale ));
11086       IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
11087       IROp    opCVT  = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
11088                            : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
11089       IRType tyF = isD ? Ity_F64 : Ity_F32;
11090       IRType tyI = isD ? Ity_I64 : Ity_I32;
11091       UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
11092       vassert(nLanes == 2 || nLanes == 4);
11093       for (UInt i = 0; i < nLanes; i++) {
11094          IRTemp src = newTemp(tyI);
11095          IRTemp res = newTemp(tyF);
11096          IRTemp rm  = mk_get_IR_rounding_mode();
11097          assign(src, getQRegLane(nn, i, tyI));
11098          assign(res, triop(opMUL, mkexpr(rm),
11099                                   binop(opCVT, mkexpr(rm), mkexpr(src)),
11100                                   scaleE));
11101          putQRegLane(dd, i, mkexpr(res));
11102       }
11103       if (!isQ) {
11104          putQRegLane(dd, 1, mkU64(0));
11105       }
11106       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11107       DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
11108           nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
11109       return True;
11110    }
11111 
11112    if (opcode == BITS5(1,1,1,1,1)) {
11113       /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
11114       /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
11115       /* If immh is of the form 00xx, the insn is invalid. */
11116       if (immh < BITS4(0,1,0,0)) return False;
11117       UInt size  = 0;
11118       UInt fbits = 0;
11119       Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
11120       /* The following holds because immh is never zero. */
11121       vassert(ok);
11122       /* The following holds because immh >= 0100. */
11123       vassert(size == X10 || size == X11);
11124       Bool isD = size == X11;
11125       Bool isU = bitU == 1;
11126       Bool isQ = bitQ == 1;
11127       if (isD && !isQ) return False; /* reject .1d case */
11128       vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
11129       Double  scale  = two_to_the_plus(fbits);
11130       IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
11131                            : IRExpr_Const(IRConst_F32( (Float)scale ));
11132       IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
11133       IROp    opCVT  = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
11134                            : (isD ? Iop_F64toI64S : Iop_F32toI32S);
11135       IRType tyF = isD ? Ity_F64 : Ity_F32;
11136       IRType tyI = isD ? Ity_I64 : Ity_I32;
11137       UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
11138       vassert(nLanes == 2 || nLanes == 4);
11139       for (UInt i = 0; i < nLanes; i++) {
11140          IRTemp src = newTemp(tyF);
11141          IRTemp res = newTemp(tyI);
11142          IRTemp rm  = newTemp(Ity_I32);
11143          assign(src, getQRegLane(nn, i, tyF));
11144          assign(rm,  mkU32(Irrm_ZERO));
11145          assign(res, binop(opCVT, mkexpr(rm),
11146                                   triop(opMUL, mkexpr(rm),
11147                                                mkexpr(src), scaleE)));
11148          putQRegLane(dd, i, mkexpr(res));
11149       }
11150       if (!isQ) {
11151          putQRegLane(dd, 1, mkU64(0));
11152       }
11153       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11154       DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
11155           nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
11156       return True;
11157    }
11158 
11159 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
11160    return False;
11161 #  undef INSN
11162 }
11163 
11164 
11165 static
dis_AdvSIMD_three_different(DisResult * dres,UInt insn)11166 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
11167 {
11168    /* 31 30 29 28    23   21 20 15     11 9 4
11169       0  Q  U  01110 size 1  m  opcode 00 n d
11170       Decode fields: u,opcode
11171    */
11172 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
11173    if (INSN(31,31) != 0
11174        || INSN(28,24) != BITS5(0,1,1,1,0)
11175        || INSN(21,21) != 1
11176        || INSN(11,10) != BITS2(0,0)) {
11177       return False;
11178    }
11179    UInt bitQ   = INSN(30,30);
11180    UInt bitU   = INSN(29,29);
11181    UInt size   = INSN(23,22);
11182    UInt mm     = INSN(20,16);
11183    UInt opcode = INSN(15,12);
11184    UInt nn     = INSN(9,5);
11185    UInt dd     = INSN(4,0);
11186    vassert(size < 4);
11187    Bool is2    = bitQ == 1;
11188 
11189    if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
11190       /* -------- 0,0000 SADDL{2} -------- */
11191       /* -------- 1,0000 UADDL{2} -------- */
11192       /* -------- 0,0010 SSUBL{2} -------- */
11193       /* -------- 1,0010 USUBL{2} -------- */
11194       /* Widens, and size refers to the narrow lanes. */
11195       if (size == X11) return False;
11196       vassert(size <= 2);
11197       Bool   isU   = bitU == 1;
11198       Bool   isADD = opcode == BITS4(0,0,0,0);
11199       IRTemp argL  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11200       IRTemp argR  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11201       IRTemp res   = newTempV128();
11202       assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11203                         mkexpr(argL), mkexpr(argR)));
11204       putQReg128(dd, mkexpr(res));
11205       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11206       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
11207       const HChar* nm        = isADD ? (isU ? "uaddl" : "saddl")
11208                                      : (isU ? "usubl" : "ssubl");
11209       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11210           nameQReg128(dd), arrWide,
11211           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11212       return True;
11213    }
11214 
11215    if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
11216       /* -------- 0,0001 SADDW{2} -------- */
11217       /* -------- 1,0001 UADDW{2} -------- */
11218       /* -------- 0,0011 SSUBW{2} -------- */
11219       /* -------- 1,0011 USUBW{2} -------- */
11220       /* Widens, and size refers to the narrow lanes. */
11221       if (size == X11) return False;
11222       vassert(size <= 2);
11223       Bool   isU   = bitU == 1;
11224       Bool   isADD = opcode == BITS4(0,0,0,1);
11225       IRTemp argR  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11226       IRTemp res   = newTempV128();
11227       assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11228                         getQReg128(nn), mkexpr(argR)));
11229       putQReg128(dd, mkexpr(res));
11230       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11231       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
11232       const HChar* nm        = isADD ? (isU ? "uaddw" : "saddw")
11233                                      : (isU ? "usubw" : "ssubw");
11234       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11235           nameQReg128(dd), arrWide,
11236           nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
11237       return True;
11238    }
11239 
11240    if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
11241       /* -------- 0,0100  ADDHN{2} -------- */
11242       /* -------- 1,0100 RADDHN{2} -------- */
11243       /* -------- 0,0110  SUBHN{2} -------- */
11244       /* -------- 1,0110 RSUBHN{2} -------- */
11245       /* Narrows, and size refers to the narrowed lanes. */
11246       if (size == X11) return False;
11247       vassert(size <= 2);
11248       const UInt shift[3] = { 8, 16, 32 };
11249       Bool isADD = opcode == BITS4(0,1,0,0);
11250       Bool isR   = bitU == 1;
11251       /* Combined elements in wide lanes */
11252       IRTemp  wide  = newTempV128();
11253       IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11254                             getQReg128(nn), getQReg128(mm));
11255       if (isR) {
11256          wideE = binop(mkVecADD(size+1),
11257                        wideE,
11258                        mkexpr(math_VEC_DUP_IMM(size+1,
11259                                                1ULL << (shift[size]-1))));
11260       }
11261       assign(wide, wideE);
11262       /* Top halves of elements, still in wide lanes */
11263       IRTemp shrd = newTempV128();
11264       assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
11265       /* Elements now compacted into lower 64 bits */
11266       IRTemp new64 = newTempV128();
11267       assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
11268       putLO64andZUorPutHI64(is2, dd, new64);
11269       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11270       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
11271       const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
11272                               : (isR ? "rsubhn" : "subhn");
11273       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11274           nameQReg128(dd), arrNarrow,
11275           nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
11276       return True;
11277    }
11278 
11279    if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
11280       /* -------- 0,0101 SABAL{2} -------- */
11281       /* -------- 1,0101 UABAL{2} -------- */
11282       /* -------- 0,0111 SABDL{2} -------- */
11283       /* -------- 1,0111 UABDL{2} -------- */
11284       /* Widens, and size refers to the narrow lanes. */
11285       if (size == X11) return False;
11286       vassert(size <= 2);
11287       Bool   isU   = bitU == 1;
11288       Bool   isACC = opcode == BITS4(0,1,0,1);
11289       IRTemp argL  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11290       IRTemp argR  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11291       IRTemp abd   = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
11292       IRTemp res   = newTempV128();
11293       assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
11294                         : mkexpr(abd));
11295       putQReg128(dd, mkexpr(res));
11296       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11297       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
11298       const HChar* nm        = isACC ? (isU ? "uabal" : "sabal")
11299                                      : (isU ? "uabdl" : "sabdl");
11300       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11301           nameQReg128(dd), arrWide,
11302           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11303       return True;
11304    }
11305 
11306    if (opcode == BITS4(1,1,0,0)
11307        || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
11308       /* -------- 0,1100  SMULL{2} -------- */ // 0 (ks)
11309       /* -------- 1,1100  UMULL{2} -------- */ // 0
11310       /* -------- 0,1000  SMLAL{2} -------- */ // 1
11311       /* -------- 1,1000  UMLAL{2} -------- */ // 1
11312       /* -------- 0,1010  SMLSL{2} -------- */ // 2
11313       /* -------- 1,1010  UMLSL{2} -------- */ // 2
11314       /* Widens, and size refers to the narrow lanes. */
11315       UInt ks = 3;
11316       switch (opcode) {
11317          case BITS4(1,1,0,0): ks = 0; break;
11318          case BITS4(1,0,0,0): ks = 1; break;
11319          case BITS4(1,0,1,0): ks = 2; break;
11320          default: vassert(0);
11321       }
11322       vassert(ks >= 0 && ks <= 2);
11323       if (size == X11) return False;
11324       vassert(size <= 2);
11325       Bool   isU  = bitU == 1;
11326       IRTemp vecN = newTempV128();
11327       IRTemp vecM = newTempV128();
11328       IRTemp vecD = newTempV128();
11329       assign(vecN, getQReg128(nn));
11330       assign(vecM, getQReg128(mm));
11331       assign(vecD, getQReg128(dd));
11332       IRTemp res = IRTemp_INVALID;
11333       math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
11334                     vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11335       putQReg128(dd, mkexpr(res));
11336       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11337       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
11338       const HChar* nm        = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
11339       DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
11340           nameQReg128(dd), arrWide,
11341           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11342       return True;
11343    }
11344 
11345    if (bitU == 0
11346        && (opcode == BITS4(1,1,0,1)
11347            || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
11348       /* -------- 0,1101  SQDMULL{2} -------- */ // 0 (ks)
11349       /* -------- 0,1001  SQDMLAL{2} -------- */ // 1
11350       /* -------- 0,1011  SQDMLSL{2} -------- */ // 2
11351       /* Widens, and size refers to the narrow lanes. */
11352       UInt ks = 3;
11353       switch (opcode) {
11354          case BITS4(1,1,0,1): ks = 0; break;
11355          case BITS4(1,0,0,1): ks = 1; break;
11356          case BITS4(1,0,1,1): ks = 2; break;
11357          default: vassert(0);
11358       }
11359       vassert(ks >= 0 && ks <= 2);
11360       if (size == X00 || size == X11) return False;
11361       vassert(size <= 2);
11362       IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
11363       vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11364       newTempsV128_3(&vecN, &vecM, &vecD);
11365       assign(vecN, getQReg128(nn));
11366       assign(vecM, getQReg128(mm));
11367       assign(vecD, getQReg128(dd));
11368       math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11369                        is2, size, "mas"[ks],
11370                        vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11371       putQReg128(dd, mkexpr(res));
11372       vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11373       updateQCFLAGwithDifference(sat1q, sat1n);
11374       if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11375          updateQCFLAGwithDifference(sat2q, sat2n);
11376       }
11377       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11378       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
11379       const HChar* nm        = ks == 0 ? "sqdmull"
11380                                        : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11381       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11382           nameQReg128(dd), arrWide,
11383           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11384       return True;
11385    }
11386 
11387    if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
11388       /* -------- 0,1110  PMULL{2} -------- */
11389       /* Widens, and size refers to the narrow lanes. */
11390       if (size != X00 && size != X11) return False;
11391       IRTemp  res  = IRTemp_INVALID;
11392       IRExpr* srcN = getQReg128(nn);
11393       IRExpr* srcM = getQReg128(mm);
11394       const HChar* arrNarrow = NULL;
11395       const HChar* arrWide   = NULL;
11396       if (size == X00) {
11397          res = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
11398                                          srcN, srcM);
11399          arrNarrow = nameArr_Q_SZ(bitQ, size);
11400          arrWide   = nameArr_Q_SZ(1,    size+1);
11401       } else {
11402          /* The same thing as the X00 case, except we have to call
11403             a helper to do it. */
11404          vassert(size == X11);
11405          res = newTemp(Ity_V128);
11406          IROp slice
11407             = is2 ? Iop_V128HIto64 : Iop_V128to64;
11408          IRExpr** args
11409             = mkIRExprVec_3( IRExpr_VECRET(),
11410                              unop(slice, srcN), unop(slice, srcM));
11411          IRDirty* di
11412             = unsafeIRDirty_1_N( res, 0/*regparms*/,
11413                                       "arm64g_dirtyhelper_PMULLQ",
11414                                       &arm64g_dirtyhelper_PMULLQ, args);
11415          stmt(IRStmt_Dirty(di));
11416          /* We can't use nameArr_Q_SZ for this because it can't deal with
11417             Q-sized (128 bit) results.  Hence do it by hand. */
11418          arrNarrow = bitQ == 0 ? "1d" : "2d";
11419          arrWide   = "1q";
11420       }
11421       putQReg128(dd, mkexpr(res));
11422       DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
11423           nameQReg128(dd), arrWide,
11424           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11425       return True;
11426    }
11427 
11428    return False;
11429 #  undef INSN
11430 }
11431 
11432 
11433 static
dis_AdvSIMD_three_same(DisResult * dres,UInt insn)11434 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
11435 {
11436    /* 31 30 29 28    23   21 20 15     10 9 4
11437       0  Q  U  01110 size 1  m  opcode 1  n d
11438       Decode fields: u,size,opcode
11439    */
11440 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
11441    if (INSN(31,31) != 0
11442        || INSN(28,24) != BITS5(0,1,1,1,0)
11443        || INSN(21,21) != 1
11444        || INSN(10,10) != 1) {
11445       return False;
11446    }
11447    UInt bitQ   = INSN(30,30);
11448    UInt bitU   = INSN(29,29);
11449    UInt size   = INSN(23,22);
11450    UInt mm     = INSN(20,16);
11451    UInt opcode = INSN(15,11);
11452    UInt nn     = INSN(9,5);
11453    UInt dd     = INSN(4,0);
11454    vassert(size < 4);
11455 
11456    if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
11457       /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
11458       /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
11459       /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
11460       /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
11461       if (size == X11) return False;
11462       Bool isADD = opcode == BITS5(0,0,0,0,0);
11463       Bool isU   = bitU == 1;
11464       /* Widen both args out, do the math, narrow to final result. */
11465       IRTemp argL   = newTempV128();
11466       IRTemp argLhi = IRTemp_INVALID;
11467       IRTemp argLlo = IRTemp_INVALID;
11468       IRTemp argR   = newTempV128();
11469       IRTemp argRhi = IRTemp_INVALID;
11470       IRTemp argRlo = IRTemp_INVALID;
11471       IRTemp resHi  = newTempV128();
11472       IRTemp resLo  = newTempV128();
11473       IRTemp res    = IRTemp_INVALID;
11474       assign(argL, getQReg128(nn));
11475       argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
11476       argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True,  size, mkexpr(argL));
11477       assign(argR, getQReg128(mm));
11478       argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
11479       argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True,  size, mkexpr(argR));
11480       IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
11481       IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
11482       assign(resHi, binop(opSxR,
11483                           binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
11484                           mkU8(1)));
11485       assign(resLo, binop(opSxR,
11486                           binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
11487                           mkU8(1)));
11488       res = math_NARROW_LANES ( resHi, resLo, size );
11489       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11490       const HChar* nm  = isADD ? (isU ? "uhadd" : "shadd")
11491                                : (isU ? "uhsub" : "shsub");
11492       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11493       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11494           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11495       return True;
11496    }
11497 
11498    if (opcode == BITS5(0,0,0,1,0)) {
11499       /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
11500       /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
11501       if (bitQ == 0 && size == X11) return False; // implied 1d case
11502       Bool   isU  = bitU == 1;
11503       IRTemp argL = newTempV128();
11504       IRTemp argR = newTempV128();
11505       assign(argL, getQReg128(nn));
11506       assign(argR, getQReg128(mm));
11507       IRTemp res = math_RHADD(size, isU, argL, argR);
11508       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11509       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11510       DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
11511           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11512       return True;
11513    }
11514 
11515    if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
11516       /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
11517       /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
11518       /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
11519       /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
11520       if (bitQ == 0 && size == X11) return False; // implied 1d case
11521       Bool isADD = opcode == BITS5(0,0,0,0,1);
11522       Bool isU   = bitU == 1;
11523       IROp qop   = Iop_INVALID;
11524       IROp nop   = Iop_INVALID;
11525       if (isADD) {
11526          qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
11527          nop = mkVecADD(size);
11528       } else {
11529          qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
11530          nop = mkVecSUB(size);
11531       }
11532       IRTemp argL = newTempV128();
11533       IRTemp argR = newTempV128();
11534       IRTemp qres = newTempV128();
11535       IRTemp nres = newTempV128();
11536       assign(argL, getQReg128(nn));
11537       assign(argR, getQReg128(mm));
11538       assign(qres, math_MAYBE_ZERO_HI64_fromE(
11539                       bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11540       assign(nres, math_MAYBE_ZERO_HI64_fromE(
11541                       bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11542       putQReg128(dd, mkexpr(qres));
11543       updateQCFLAGwithDifference(qres, nres);
11544       const HChar* nm  = isADD ? (isU ? "uqadd" : "sqadd")
11545                                : (isU ? "uqsub" : "sqsub");
11546       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11547       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11548           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11549       return True;
11550    }
11551 
11552    if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
11553       /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
11554       /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
11555       /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
11556       /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
11557       Bool   isORx  = (size & 2) == 2;
11558       Bool   invert = (size & 1) == 1;
11559       IRTemp res    = newTempV128();
11560       assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
11561                         getQReg128(nn),
11562                         invert ? unop(Iop_NotV128, getQReg128(mm))
11563                                : getQReg128(mm)));
11564       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11565       const HChar* names[4] = { "and", "bic", "orr", "orn" };
11566       const HChar* ar = bitQ == 1 ? "16b" : "8b";
11567       DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
11568           nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
11569       return True;
11570    }
11571 
11572    if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
11573       /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
11574       /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
11575       /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
11576       /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
11577       IRTemp argD = newTempV128();
11578       IRTemp argN = newTempV128();
11579       IRTemp argM = newTempV128();
11580       assign(argD, getQReg128(dd));
11581       assign(argN, getQReg128(nn));
11582       assign(argM, getQReg128(mm));
11583       const IROp opXOR = Iop_XorV128;
11584       const IROp opAND = Iop_AndV128;
11585       const IROp opNOT = Iop_NotV128;
11586       IRTemp res = newTempV128();
11587       switch (size) {
11588          case BITS2(0,0): /* EOR */
11589             assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
11590             break;
11591          case BITS2(0,1): /* BSL */
11592             assign(res, binop(opXOR, mkexpr(argM),
11593                               binop(opAND,
11594                                     binop(opXOR, mkexpr(argM), mkexpr(argN)),
11595                                           mkexpr(argD))));
11596             break;
11597          case BITS2(1,0): /* BIT */
11598             assign(res, binop(opXOR, mkexpr(argD),
11599                               binop(opAND,
11600                                     binop(opXOR, mkexpr(argD), mkexpr(argN)),
11601                                     mkexpr(argM))));
11602             break;
11603          case BITS2(1,1): /* BIF */
11604             assign(res, binop(opXOR, mkexpr(argD),
11605                               binop(opAND,
11606                                     binop(opXOR, mkexpr(argD), mkexpr(argN)),
11607                                     unop(opNOT, mkexpr(argM)))));
11608             break;
11609          default:
11610             vassert(0);
11611       }
11612       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11613       const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
11614       const HChar* arr = bitQ == 1 ? "16b" : "8b";
11615       DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
11616           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11617       return True;
11618    }
11619 
11620    if (opcode == BITS5(0,0,1,1,0)) {
11621       /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11622       /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11623       if (bitQ == 0 && size == X11) return False; // implied 1d case
11624       Bool   isGT  = bitU == 0;
11625       IRExpr* argL = getQReg128(nn);
11626       IRExpr* argR = getQReg128(mm);
11627       IRTemp  res  = newTempV128();
11628       assign(res,
11629              isGT ? binop(mkVecCMPGTS(size), argL, argR)
11630                   : binop(mkVecCMPGTU(size), argL, argR));
11631       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11632       const HChar* nm  = isGT ? "cmgt" : "cmhi";
11633       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11634       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11635           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11636       return True;
11637    }
11638 
11639    if (opcode == BITS5(0,0,1,1,1)) {
11640       /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
11641       /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
11642       if (bitQ == 0 && size == X11) return False; // implied 1d case
11643       Bool    isGE = bitU == 0;
11644       IRExpr* argL = getQReg128(nn);
11645       IRExpr* argR = getQReg128(mm);
11646       IRTemp  res  = newTempV128();
11647       assign(res,
11648              isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
11649                   : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
11650       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11651       const HChar* nm  = isGE ? "cmge" : "cmhs";
11652       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11653       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11654           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11655       return True;
11656    }
11657 
11658    if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
11659       /* -------- 0,xx,01000 SSHL  std7_std7_std7 -------- */
11660       /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
11661       /* -------- 1,xx,01000 USHL  std7_std7_std7 -------- */
11662       /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
11663       if (bitQ == 0 && size == X11) return False; // implied 1d case
11664       Bool isU = bitU == 1;
11665       Bool isR = opcode == BITS5(0,1,0,1,0);
11666       IROp op  = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
11667                      : (isU ? mkVecSHU(size)  : mkVecSHS(size));
11668       IRTemp res = newTempV128();
11669       assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11670       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11671       const HChar* nm  = isR ? (isU ? "urshl" : "srshl")
11672                              : (isU ? "ushl"  : "sshl");
11673       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11674       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11675           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11676       return True;
11677    }
11678 
11679    if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
11680       /* -------- 0,xx,01001 SQSHL  std7_std7_std7 -------- */
11681       /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
11682       /* -------- 1,xx,01001 UQSHL  std7_std7_std7 -------- */
11683       /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
11684       if (bitQ == 0 && size == X11) return False; // implied 1d case
11685       Bool isU = bitU == 1;
11686       Bool isR = opcode == BITS5(0,1,0,1,1);
11687       IROp op  = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
11688                      : (isU ? mkVecQANDUQSH(size)  : mkVecQANDSQSH(size));
11689       /* This is a bit tricky.  If we're only interested in the lowest 64 bits
11690          of the result (viz, bitQ == 0), then we must adjust the operands to
11691          ensure that the upper part of the result, that we don't care about,
11692          doesn't pollute the returned Q value.  To do this, zero out the upper
11693          operand halves beforehand.  This works because it means, for the
11694          lanes we don't care about, we are shifting zero by zero, which can
11695          never saturate. */
11696       IRTemp res256 = newTemp(Ity_V256);
11697       IRTemp resSH  = newTempV128();
11698       IRTemp resQ   = newTempV128();
11699       IRTemp zero   = newTempV128();
11700       assign(res256, binop(op,
11701                            math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
11702                            math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
11703       assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
11704       assign(resQ,  unop(Iop_V256toV128_1, mkexpr(res256)));
11705       assign(zero,  mkV128(0x0000));
11706       putQReg128(dd, mkexpr(resSH));
11707       updateQCFLAGwithDifference(resQ, zero);
11708       const HChar* nm  = isR ? (isU ? "uqrshl" : "sqrshl")
11709                              : (isU ? "uqshl"  : "sqshl");
11710       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11711       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11712           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11713       return True;
11714    }
11715 
11716    if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
11717       /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
11718       /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
11719       /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
11720       /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
11721       if (bitQ == 0 && size == X11) return False; // implied 1d case
11722       Bool isU   = bitU == 1;
11723       Bool isMAX = (opcode & 1) == 0;
11724       IROp op    = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11725                          : (isU ? mkVecMINU(size) : mkVecMINS(size));
11726       IRTemp t   = newTempV128();
11727       assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11728       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11729       const HChar* nm = isMAX ? (isU ? "umax" : "smax")
11730                               : (isU ? "umin" : "smin");
11731       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11732       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11733           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11734       return True;
11735    }
11736 
11737    if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
11738       /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
11739       /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
11740       /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
11741       /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
11742       if (size == X11) return False; // 1d/2d cases not allowed
11743       Bool isU   = bitU == 1;
11744       Bool isACC = opcode == BITS5(0,1,1,1,1);
11745       vassert(size <= 2);
11746       IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
11747       IRTemp t2 = newTempV128();
11748       assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
11749                        : mkexpr(t1));
11750       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11751       const HChar* nm  = isACC ? (isU ? "uaba" : "saba")
11752                                : (isU ? "uabd" : "sabd");
11753       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11754       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11755           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11756       return True;
11757    }
11758 
11759    if (opcode == BITS5(1,0,0,0,0)) {
11760       /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
11761       /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
11762       if (bitQ == 0 && size == X11) return False; // implied 1d case
11763       Bool   isSUB = bitU == 1;
11764       IROp   op    = isSUB ? mkVecSUB(size) : mkVecADD(size);
11765       IRTemp t     = newTempV128();
11766       assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11767       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11768       const HChar* nm  = isSUB ? "sub" : "add";
11769       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11770       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11771           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11772       return True;
11773    }
11774 
11775    if (opcode == BITS5(1,0,0,0,1)) {
11776       /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
11777       /* -------- 1,xx,10001 CMEQ  std7_std7_std7 -------- */ // ==
11778       if (bitQ == 0 && size == X11) return False; // implied 1d case
11779       Bool    isEQ = bitU == 1;
11780       IRExpr* argL = getQReg128(nn);
11781       IRExpr* argR = getQReg128(mm);
11782       IRTemp  res  = newTempV128();
11783       assign(res,
11784              isEQ ? binop(mkVecCMPEQ(size), argL, argR)
11785                   : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
11786                                             binop(Iop_AndV128, argL, argR),
11787                                             mkV128(0x0000))));
11788       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11789       const HChar* nm  = isEQ ? "cmeq" : "cmtst";
11790       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11791       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11792           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11793       return True;
11794    }
11795 
11796    if (opcode == BITS5(1,0,0,1,0)) {
11797       /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
11798       /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
11799       if (bitQ == 0 && size == X11) return False; // implied 1d case
11800       Bool isMLS = bitU == 1;
11801       IROp   opMUL    = mkVecMUL(size);
11802       IROp   opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
11803       IRTemp res      = newTempV128();
11804       if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
11805          assign(res, binop(opADDSUB,
11806                            getQReg128(dd),
11807                            binop(opMUL, getQReg128(nn), getQReg128(mm))));
11808          putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11809          const HChar* arr = nameArr_Q_SZ(bitQ, size);
11810          DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
11811              nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11812          return True;
11813       }
11814       return False;
11815    }
11816 
11817    if (opcode == BITS5(1,0,0,1,1)) {
11818       /* -------- 0,xx,10011 MUL  std7_std7_std7 -------- */
11819       /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
11820       if (bitQ == 0 && size == X11) return False; // implied 1d case
11821       Bool isPMUL = bitU == 1;
11822       const IROp opsPMUL[4]
11823          = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
11824       IROp   opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
11825       IRTemp res   = newTempV128();
11826       if (opMUL != Iop_INVALID) {
11827          assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
11828          putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11829          const HChar* arr = nameArr_Q_SZ(bitQ, size);
11830          DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
11831              nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11832          return True;
11833       }
11834       return False;
11835    }
11836 
11837    if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
11838       /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
11839       /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
11840       /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
11841       /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
11842       if (size == X11) return False;
11843       Bool isU   = bitU == 1;
11844       Bool isMAX = opcode == BITS5(1,0,1,0,0);
11845       IRTemp vN  = newTempV128();
11846       IRTemp vM  = newTempV128();
11847       IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11848                       : (isU ? mkVecMINU(size) : mkVecMINS(size));
11849       assign(vN, getQReg128(nn));
11850       assign(vM, getQReg128(mm));
11851       IRTemp res128 = newTempV128();
11852       assign(res128,
11853              binop(op,
11854                    binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11855                    binop(mkVecCATODDLANES(size),  mkexpr(vM), mkexpr(vN))));
11856       /* In the half-width case, use CatEL32x4 to extract the half-width
11857          result from the full-width result. */
11858       IRExpr* res
11859          = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11860                             binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11861                                                         mkexpr(res128)))
11862                      : mkexpr(res128);
11863       putQReg128(dd, res);
11864       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11865       const HChar* nm  = isMAX ? (isU ? "umaxp" : "smaxp")
11866                                : (isU ? "uminp" : "sminp");
11867       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11868           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11869       return True;
11870    }
11871 
11872    if (opcode == BITS5(1,0,1,1,0)) {
11873       /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
11874       /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
11875       if (size == X00 || size == X11) return False;
11876       Bool isR = bitU == 1;
11877       IRTemp res, sat1q, sat1n, vN, vM;
11878       res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11879       newTempsV128_2(&vN, &vM);
11880       assign(vN, getQReg128(nn));
11881       assign(vM, getQReg128(mm));
11882       math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11883       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11884       IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
11885       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11886       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11887       const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
11888       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11889           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11890       return True;
11891    }
11892 
11893    if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
11894       /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
11895       if (bitQ == 0 && size == X11) return False; // implied 1d case
11896       IRTemp vN = newTempV128();
11897       IRTemp vM = newTempV128();
11898       assign(vN, getQReg128(nn));
11899       assign(vM, getQReg128(mm));
11900       IRTemp res128 = newTempV128();
11901       assign(res128,
11902              binop(mkVecADD(size),
11903                    binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11904                    binop(mkVecCATODDLANES(size),  mkexpr(vM), mkexpr(vN))));
11905       /* In the half-width case, use CatEL32x4 to extract the half-width
11906          result from the full-width result. */
11907       IRExpr* res
11908          = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11909                             binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11910                                                         mkexpr(res128)))
11911                      : mkexpr(res128);
11912       putQReg128(dd, res);
11913       const HChar* arr = nameArr_Q_SZ(bitQ, size);
11914       DIP("addp %s.%s, %s.%s, %s.%s\n",
11915           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11916       return True;
11917    }
11918 
11919    if (bitU == 0
11920        && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11921       /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11922       /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11923       /* -------- 0,0x,11110 FMAX   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11924       /* -------- 0,1x,11110 FMIN   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11925       /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11926       Bool   isD   = (size & 1) == 1;
11927       if (bitQ == 0 && isD) return False; // implied 1d case
11928       Bool   isMIN = (size & 2) == 2;
11929       Bool   isNM  = opcode == BITS5(1,1,0,0,0);
11930       IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
11931       IRTemp res   = newTempV128();
11932       assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
11933       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11934       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11935       DIP("%s%s %s.%s, %s.%s, %s.%s\n",
11936           isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11937           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11938       return True;
11939    }
11940 
11941    if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
11942       /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11943       /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11944       Bool isD   = (size & 1) == 1;
11945       Bool isSUB = (size & 2) == 2;
11946       if (bitQ == 0 && isD) return False; // implied 1d case
11947       IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11948       IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11949       IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11950       IRTemp rm = mk_get_IR_rounding_mode();
11951       IRTemp t1 = newTempV128();
11952       IRTemp t2 = newTempV128();
11953       // FIXME: double rounding; use FMA primops instead
11954       assign(t1, triop(opMUL,
11955                        mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11956       assign(t2, triop(isSUB ? opSUB : opADD,
11957                        mkexpr(rm), getQReg128(dd), mkexpr(t1)));
11958       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11959       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11960       DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
11961           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11962       return True;
11963    }
11964 
11965    if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
11966       /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11967       /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11968       Bool isD   = (size & 1) == 1;
11969       Bool isSUB = (size & 2) == 2;
11970       if (bitQ == 0 && isD) return False; // implied 1d case
11971       const IROp ops[4]
11972          = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
11973       IROp   op = ops[size];
11974       IRTemp rm = mk_get_IR_rounding_mode();
11975       IRTemp t1 = newTempV128();
11976       IRTemp t2 = newTempV128();
11977       assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11978       assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
11979       putQReg128(dd, mkexpr(t2));
11980       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11981       DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
11982           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11983       return True;
11984    }
11985 
11986    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
11987       /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11988       Bool isD = (size & 1) == 1;
11989       if (bitQ == 0 && isD) return False; // implied 1d case
11990       IROp   opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11991       IROp   opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11992       IRTemp rm    = mk_get_IR_rounding_mode();
11993       IRTemp t1    = newTempV128();
11994       IRTemp t2    = newTempV128();
11995       // FIXME: use Abd primop instead?
11996       assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11997       assign(t2, unop(opABS, mkexpr(t1)));
11998       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11999       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12000       DIP("fabd %s.%s, %s.%s, %s.%s\n",
12001           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12002       return True;
12003    }
12004 
12005    if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
12006       /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12007       /* -------- 1,0x,11011 FMUL  2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12008       // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
12009       Bool isD    = (size & 1) == 1;
12010       Bool isMULX = bitU == 0;
12011       if (bitQ == 0 && isD) return False; // implied 1d case
12012       IRTemp rm = mk_get_IR_rounding_mode();
12013       IRTemp t1 = newTempV128();
12014       assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12015                        mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12016       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12017       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12018       DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
12019           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12020       return True;
12021    }
12022 
12023    if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
12024       /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12025       /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12026       Bool isD = (size & 1) == 1;
12027       if (bitQ == 0 && isD) return False; // implied 1d case
12028       Bool   isGE  = bitU == 1;
12029       IROp   opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
12030                           : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
12031       IRTemp t1    = newTempV128();
12032       assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
12033                       : binop(opCMP, getQReg128(nn), getQReg128(mm)));
12034       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12035       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12036       DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
12037           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12038       return True;
12039    }
12040 
12041    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
12042       /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12043       Bool isD = (size & 1) == 1;
12044       if (bitQ == 0 && isD) return False; // implied 1d case
12045       IROp   opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12046       IRTemp t1    = newTempV128();
12047       assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
12048       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12049       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12050       DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
12051           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12052       return True;
12053    }
12054 
12055    if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
12056       /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12057       /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12058       Bool isD  = (size & 1) == 1;
12059       Bool isGT = (size & 2) == 2;
12060       if (bitQ == 0 && isD) return False; // implied 1d case
12061       IROp   opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
12062                           : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
12063       IROp   opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
12064       IRTemp t1    = newTempV128();
12065       assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
12066                               unop(opABS, getQReg128(nn)))); // swapd
12067       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12068       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12069       DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
12070           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12071       return True;
12072    }
12073 
12074    if (bitU == 1
12075        && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
12076       /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12077       /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12078       /* -------- 1,0x,11110 FMAXP   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12079       /* -------- 1,1x,11110 FMINP   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12080       /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12081       Bool isD = (size & 1) == 1;
12082       if (bitQ == 0 && isD) return False; // implied 1d case
12083       Bool   isMIN = (size & 2) == 2;
12084       Bool   isNM  = opcode == BITS5(1,1,0,0,0);
12085       IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
12086       IRTemp srcN  = newTempV128();
12087       IRTemp srcM  = newTempV128();
12088       IRTemp preL  = IRTemp_INVALID;
12089       IRTemp preR  = IRTemp_INVALID;
12090       assign(srcN, getQReg128(nn));
12091       assign(srcM, getQReg128(mm));
12092       math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
12093                                            srcM, srcN, isD, bitQ);
12094       putQReg128(
12095          dd, math_MAYBE_ZERO_HI64_fromE(
12096                 bitQ,
12097                 binop(opMXX, mkexpr(preL), mkexpr(preR))));
12098       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12099       DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
12100           isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
12101           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12102       return True;
12103    }
12104 
12105    if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
12106       /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12107       Bool isD = size == X01;
12108       if (bitQ == 0 && isD) return False; // implied 1d case
12109       IRTemp srcN = newTempV128();
12110       IRTemp srcM = newTempV128();
12111       IRTemp preL = IRTemp_INVALID;
12112       IRTemp preR = IRTemp_INVALID;
12113       assign(srcN, getQReg128(nn));
12114       assign(srcM, getQReg128(mm));
12115       math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
12116                                            srcM, srcN, isD, bitQ);
12117       putQReg128(
12118          dd, math_MAYBE_ZERO_HI64_fromE(
12119                 bitQ,
12120                 triop(mkVecADDF(isD ? 3 : 2),
12121                       mkexpr(mk_get_IR_rounding_mode()),
12122                       mkexpr(preL), mkexpr(preR))));
12123       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12124       DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
12125           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12126       return True;
12127    }
12128 
12129    if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
12130       /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12131       Bool isD = (size & 1) == 1;
12132       if (bitQ == 0 && isD) return False; // implied 1d case
12133       vassert(size <= 1);
12134       const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
12135       IROp   op = ops[size];
12136       IRTemp rm = mk_get_IR_rounding_mode();
12137       IRTemp t1 = newTempV128();
12138       IRTemp t2 = newTempV128();
12139       assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12140       assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
12141       putQReg128(dd, mkexpr(t2));
12142       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12143       DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
12144           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12145       return True;
12146    }
12147 
12148    if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
12149       /* -------- 0,0x,11111: FRECPS  2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12150       /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12151       Bool isSQRT = (size & 2) == 2;
12152       Bool isD    = (size & 1) == 1;
12153       if (bitQ == 0 && isD) return False; // implied 1d case
12154       IROp op     = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
12155                            : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
12156       IRTemp res = newTempV128();
12157       assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
12158       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12159       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12160       DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
12161           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12162       return True;
12163    }
12164 
12165    return False;
12166 #  undef INSN
12167 }
12168 
12169 
12170 static
dis_AdvSIMD_two_reg_misc(DisResult * dres,UInt insn)12171 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
12172 {
12173    /* 31 30 29 28    23   21    16     11 9 4
12174       0  Q  U  01110 size 10000 opcode 10 n d
12175       Decode fields: U,size,opcode
12176    */
12177 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12178    if (INSN(31,31) != 0
12179        || INSN(28,24) != BITS5(0,1,1,1,0)
12180        || INSN(21,17) != BITS5(1,0,0,0,0)
12181        || INSN(11,10) != BITS2(1,0)) {
12182       return False;
12183    }
12184    UInt bitQ   = INSN(30,30);
12185    UInt bitU   = INSN(29,29);
12186    UInt size   = INSN(23,22);
12187    UInt opcode = INSN(16,12);
12188    UInt nn     = INSN(9,5);
12189    UInt dd     = INSN(4,0);
12190    vassert(size < 4);
12191 
12192    if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
12193       /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
12194       /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
12195       /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
12196       const IROp iops[3] = { Iop_Reverse8sIn64_x2,
12197                              Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
12198       vassert(size <= 2);
12199       IRTemp res = newTempV128();
12200       assign(res, unop(iops[size], getQReg128(nn)));
12201       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12202       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12203       DIP("%s %s.%s, %s.%s\n", "rev64",
12204           nameQReg128(dd), arr, nameQReg128(nn), arr);
12205       return True;
12206    }
12207 
12208    if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
12209       /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
12210       /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
12211       Bool   isH = size == X01;
12212       IRTemp res = newTempV128();
12213       IROp   iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
12214       assign(res, unop(iop, getQReg128(nn)));
12215       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12216       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12217       DIP("%s %s.%s, %s.%s\n", "rev32",
12218           nameQReg128(dd), arr, nameQReg128(nn), arr);
12219       return True;
12220    }
12221 
12222    if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
12223       /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
12224       IRTemp res = newTempV128();
12225       assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
12226       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12227       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12228       DIP("%s %s.%s, %s.%s\n", "rev16",
12229           nameQReg128(dd), arr, nameQReg128(nn), arr);
12230       return True;
12231    }
12232 
12233    if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
12234       /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
12235       /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
12236       /* -------- 0,xx,00110: SADALP std6_std6 -------- */
12237       /* -------- 1,xx,00110: UADALP std6_std6 -------- */
12238       /* Widens, and size refers to the narrow size. */
12239       if (size == X11) return False; // no 1d or 2d cases
12240       Bool   isU   = bitU == 1;
12241       Bool   isACC = opcode == BITS5(0,0,1,1,0);
12242       IRTemp src   = newTempV128();
12243       IRTemp sum   = newTempV128();
12244       IRTemp res   = newTempV128();
12245       assign(src, getQReg128(nn));
12246       assign(sum,
12247              binop(mkVecADD(size+1),
12248                    mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12249                              isU, True/*fromOdd*/, size, mkexpr(src))),
12250                    mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12251                              isU, False/*!fromOdd*/, size, mkexpr(src)))));
12252       assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
12253                         : mkexpr(sum));
12254       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12255       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12256       const HChar* arrWide   = nameArr_Q_SZ(bitQ, size+1);
12257       DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
12258                                      : (isU ? "uaddlp" : "saddlp"),
12259           nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12260       return True;
12261    }
12262 
12263    if (opcode == BITS5(0,0,0,1,1)) {
12264       /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
12265       /* -------- 1,xx,00011: USQADD std7_std7 -------- */
12266       if (bitQ == 0 && size == X11) return False; // implied 1d case
12267       Bool isUSQADD = bitU == 1;
12268       /* This is switched (in the US vs SU sense) deliberately.
12269          SUQADD corresponds to the ExtUSsatSS variants and
12270          USQADD corresponds to the ExtSUsatUU variants.
12271          See libvex_ir for more details. */
12272       IROp   qop  = isUSQADD ? mkVecQADDEXTSUSATUU(size)
12273                              : mkVecQADDEXTUSSATSS(size);
12274       IROp   nop  = mkVecADD(size);
12275       IRTemp argL = newTempV128();
12276       IRTemp argR = newTempV128();
12277       IRTemp qres = newTempV128();
12278       IRTemp nres = newTempV128();
12279       /* Because the two arguments to the addition are implicitly
12280          extended differently (one signedly, the other unsignedly) it is
12281          important to present them to the primop in the correct order. */
12282       assign(argL, getQReg128(nn));
12283       assign(argR, getQReg128(dd));
12284       assign(qres, math_MAYBE_ZERO_HI64_fromE(
12285                       bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
12286       assign(nres, math_MAYBE_ZERO_HI64_fromE(
12287                       bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
12288       putQReg128(dd, mkexpr(qres));
12289       updateQCFLAGwithDifference(qres, nres);
12290       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12291       DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
12292           nameQReg128(dd), arr, nameQReg128(nn), arr);
12293       return True;
12294    }
12295 
12296    if (opcode == BITS5(0,0,1,0,0)) {
12297       /* -------- 0,xx,00100: CLS std6_std6 -------- */
12298       /* -------- 1,xx,00100: CLZ std6_std6 -------- */
12299       if (size == X11) return False; // no 1d or 2d cases
12300       const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
12301       const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
12302       Bool   isCLZ = bitU == 1;
12303       IRTemp res   = newTempV128();
12304       vassert(size <= 2);
12305       assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
12306       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12307       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12308       DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
12309           nameQReg128(dd), arr, nameQReg128(nn), arr);
12310       return True;
12311    }
12312 
12313    if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
12314       /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
12315       /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
12316       IRTemp res = newTempV128();
12317       assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
12318       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12319       const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12320       DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
12321           nameQReg128(dd), arr, nameQReg128(nn), arr);
12322       return True;
12323    }
12324 
12325    if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
12326       /* -------- 1,01,00101  RBIT 16b_16b, 8b_8b -------- */
12327       IRTemp res = newTempV128();
12328       assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
12329       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12330       const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12331       DIP("%s %s.%s, %s.%s\n", "rbit",
12332           nameQReg128(dd), arr, nameQReg128(nn), arr);
12333       return True;
12334    }
12335 
12336    if (opcode == BITS5(0,0,1,1,1)) {
12337       /* -------- 0,xx,00111 SQABS std7_std7 -------- */
12338       /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
12339       if (bitQ == 0 && size == X11) return False; // implied 1d case
12340       Bool   isNEG  = bitU == 1;
12341       IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
12342       (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
12343                                          getQReg128(nn), size );
12344       IRTemp qres = newTempV128(), nres = newTempV128();
12345       assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
12346       assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
12347       putQReg128(dd, mkexpr(qres));
12348       updateQCFLAGwithDifference(qres, nres);
12349       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12350       DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
12351           nameQReg128(dd), arr, nameQReg128(nn), arr);
12352       return True;
12353    }
12354 
12355    if (opcode == BITS5(0,1,0,0,0)) {
12356       /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
12357       /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
12358       if (bitQ == 0 && size == X11) return False; // implied 1d case
12359       Bool    isGT  = bitU == 0;
12360       IRExpr* argL  = getQReg128(nn);
12361       IRExpr* argR  = mkV128(0x0000);
12362       IRTemp  res   = newTempV128();
12363       IROp    opGTS = mkVecCMPGTS(size);
12364       assign(res, isGT ? binop(opGTS, argL, argR)
12365                        : unop(Iop_NotV128, binop(opGTS, argR, argL)));
12366       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12367       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12368       DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
12369           nameQReg128(dd), arr, nameQReg128(nn), arr);
12370       return True;
12371    }
12372 
12373    if (opcode == BITS5(0,1,0,0,1)) {
12374       /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
12375       /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
12376       if (bitQ == 0 && size == X11) return False; // implied 1d case
12377       Bool    isEQ = bitU == 0;
12378       IRExpr* argL = getQReg128(nn);
12379       IRExpr* argR = mkV128(0x0000);
12380       IRTemp  res  = newTempV128();
12381       assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
12382                        : unop(Iop_NotV128,
12383                               binop(mkVecCMPGTS(size), argL, argR)));
12384       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12385       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12386       DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
12387           nameQReg128(dd), arr, nameQReg128(nn), arr);
12388       return True;
12389    }
12390 
12391    if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
12392       /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
12393       if (bitQ == 0 && size == X11) return False; // implied 1d case
12394       IRExpr* argL = getQReg128(nn);
12395       IRExpr* argR = mkV128(0x0000);
12396       IRTemp  res  = newTempV128();
12397       assign(res, binop(mkVecCMPGTS(size), argR, argL));
12398       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12399       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12400       DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
12401           nameQReg128(dd), arr, nameQReg128(nn), arr);
12402       return True;
12403    }
12404 
12405    if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
12406       /* -------- 0,xx,01011: ABS std7_std7 -------- */
12407       if (bitQ == 0 && size == X11) return False; // implied 1d case
12408       IRTemp res = newTempV128();
12409       assign(res, unop(mkVecABS(size), getQReg128(nn)));
12410       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12411       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12412       DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12413       return True;
12414    }
12415 
12416    if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
12417       /* -------- 1,xx,01011: NEG std7_std7 -------- */
12418       if (bitQ == 0 && size == X11) return False; // implied 1d case
12419       IRTemp res = newTempV128();
12420       assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
12421       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12422       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12423       DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12424       return True;
12425    }
12426 
12427    UInt ix = 0; /*INVALID*/
12428    if (size >= X10) {
12429       switch (opcode) {
12430          case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
12431          case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
12432          case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
12433          default: break;
12434       }
12435    }
12436    if (ix > 0) {
12437       /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
12438       /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
12439       /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
12440       /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
12441       /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
12442       if (bitQ == 0 && size == X11) return False; // implied 1d case
12443       Bool   isD     = size == X11;
12444       IROp   opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
12445       IROp   opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
12446       IROp   opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12447       IROp   opCmp   = Iop_INVALID;
12448       Bool   swap    = False;
12449       const HChar* nm = "??";
12450       switch (ix) {
12451          case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
12452          case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
12453          case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
12454          case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
12455          case 5: nm = "fcmle"; opCmp = opCmpLE; break;
12456          default: vassert(0);
12457       }
12458       IRExpr* zero = mkV128(0x0000);
12459       IRTemp res = newTempV128();
12460       assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
12461                        : binop(opCmp, getQReg128(nn), zero));
12462       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12463       const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12464       DIP("%s %s.%s, %s.%s, #0.0\n", nm,
12465           nameQReg128(dd), arr, nameQReg128(nn), arr);
12466       return True;
12467    }
12468 
12469    if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
12470       /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
12471       /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
12472       if (bitQ == 0 && size == X11) return False; // implied 1d case
12473       Bool   isFNEG = bitU == 1;
12474       IROp   op     = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
12475                              : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
12476       IRTemp res = newTempV128();
12477       assign(res, unop(op, getQReg128(nn)));
12478       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12479       const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12480       DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
12481           nameQReg128(dd), arr, nameQReg128(nn), arr);
12482       return True;
12483    }
12484 
12485    if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
12486       /* -------- 0,xx,10010: XTN{,2} -------- */
12487       if (size == X11) return False;
12488       vassert(size < 3);
12489       Bool   is2  = bitQ == 1;
12490       IROp   opN  = mkVecNARROWUN(size);
12491       IRTemp resN = newTempV128();
12492       assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
12493       putLO64andZUorPutHI64(is2, dd, resN);
12494       const HChar* nm        = "xtn";
12495       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12496       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12497       DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12498           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12499       return True;
12500    }
12501 
12502    if (opcode == BITS5(1,0,1,0,0)
12503        || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
12504       /* -------- 0,xx,10100: SQXTN{,2} -------- */
12505       /* -------- 1,xx,10100: UQXTN{,2} -------- */
12506       /* -------- 1,xx,10010: SQXTUN{,2} -------- */
12507       if (size == X11) return False;
12508       vassert(size < 3);
12509       Bool  is2    = bitQ == 1;
12510       IROp  opN    = Iop_INVALID;
12511       Bool  zWiden = True;
12512       const HChar* nm = "??";
12513       /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
12514          opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
12515       }
12516       else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
12517          opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
12518       }
12519       else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
12520          opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
12521       }
12522       else vassert(0);
12523       IRTemp src  = newTempV128();
12524       assign(src, getQReg128(nn));
12525       IRTemp resN = newTempV128();
12526       assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
12527       putLO64andZUorPutHI64(is2, dd, resN);
12528       IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
12529                                               size, mkexpr(resN));
12530       updateQCFLAGwithDifference(src, resW);
12531       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12532       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12533       DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12534           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12535       return True;
12536    }
12537 
12538    if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
12539       /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
12540       /* Widens, and size is the narrow size. */
12541       if (size == X11) return False;
12542       Bool is2   = bitQ == 1;
12543       IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
12544       IROp opSHL = mkVecSHLN(size+1);
12545       IRTemp src = newTempV128();
12546       IRTemp res = newTempV128();
12547       assign(src, getQReg128(nn));
12548       assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
12549                                mkU8(8 << size)));
12550       putQReg128(dd, mkexpr(res));
12551       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12552       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
12553       DIP("shll%s %s.%s, %s.%s, #%d\n", is2 ? "2" : "",
12554           nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
12555       return True;
12556    }
12557 
12558    if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
12559       /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
12560       UInt   nLanes = size == X00 ? 4 : 2;
12561       IRType srcTy  = size == X00 ? Ity_F32 : Ity_F64;
12562       IROp   opCvt  = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
12563       IRTemp rm     = mk_get_IR_rounding_mode();
12564       IRTemp src[nLanes];
12565       for (UInt i = 0; i < nLanes; i++) {
12566          src[i] = newTemp(srcTy);
12567          assign(src[i], getQRegLane(nn, i, srcTy));
12568       }
12569       for (UInt i = 0; i < nLanes; i++) {
12570          putQRegLane(dd, nLanes * bitQ + i,
12571                          binop(opCvt, mkexpr(rm), mkexpr(src[i])));
12572       }
12573       if (bitQ == 0) {
12574          putQRegLane(dd, 1, mkU64(0));
12575       }
12576       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12577       const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
12578       DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12579           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12580       return True;
12581    }
12582 
12583    if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
12584       /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
12585       /* Using Irrm_NEAREST here isn't right.  The docs say "round to
12586          odd" but I don't know what that really means. */
12587       IRType srcTy = Ity_F64;
12588       IROp   opCvt = Iop_F64toF32;
12589       IRTemp src[2];
12590       for (UInt i = 0; i < 2; i++) {
12591          src[i] = newTemp(srcTy);
12592          assign(src[i], getQRegLane(nn, i, srcTy));
12593       }
12594       for (UInt i = 0; i < 2; i++) {
12595          putQRegLane(dd, 2 * bitQ + i,
12596                          binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
12597       }
12598       if (bitQ == 0) {
12599          putQRegLane(dd, 1, mkU64(0));
12600       }
12601       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12602       const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
12603       DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12604           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12605       return True;
12606    }
12607 
12608    if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
12609       /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
12610       UInt   nLanes = size == X00 ? 4 : 2;
12611       IRType srcTy  = size == X00 ? Ity_F16 : Ity_F32;
12612       IROp   opCvt  = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
12613       IRTemp src[nLanes];
12614       for (UInt i = 0; i < nLanes; i++) {
12615          src[i] = newTemp(srcTy);
12616          assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
12617       }
12618       for (UInt i = 0; i < nLanes; i++) {
12619          putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
12620       }
12621       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12622       const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
12623       DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12624           nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12625       return True;
12626    }
12627 
12628    ix = 0;
12629    if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
12630       ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
12631       // = 1 + bitU[0]:size[1]:opcode[0]
12632       vassert(ix >= 1 && ix <= 8);
12633       if (ix == 7) ix = 0;
12634    }
12635    if (ix > 0) {
12636       /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
12637       /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
12638       /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
12639       /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
12640       /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
12641       /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
12642       /* -------- 1,1x,11000 (apparently unassigned)    (7) -------- */
12643       /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
12644       /* rm plan:
12645          FRINTN: tieeven -- !! FIXME KLUDGED !!
12646          FRINTM: -inf
12647          FRINTP: +inf
12648          FRINTZ: zero
12649          FRINTA: tieaway -- !! FIXME KLUDGED !!
12650          FRINTX: per FPCR + "exact = TRUE"
12651          FRINTI: per FPCR
12652       */
12653       Bool isD = (size & 1) == 1;
12654       if (bitQ == 0 && isD) return False; // implied 1d case
12655 
12656       IRTemp irrmRM = mk_get_IR_rounding_mode();
12657 
12658       UChar ch = '?';
12659       IRTemp irrm = newTemp(Ity_I32);
12660       switch (ix) {
12661          case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12662          case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
12663          case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
12664          case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
12665          // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
12666          case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12667          // I am unsure about the following, due to the "integral exact"
12668          // description in the manual.  What does it mean? (frintx, that is)
12669          case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
12670          case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
12671          default: vassert(0);
12672       }
12673 
12674       IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
12675       if (isD) {
12676          for (UInt i = 0; i < 2; i++) {
12677             putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12678                                             getQRegLane(nn, i, Ity_F64)));
12679          }
12680       } else {
12681          UInt n = bitQ==1 ? 4 : 2;
12682          for (UInt i = 0; i < n; i++) {
12683             putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12684                                             getQRegLane(nn, i, Ity_F32)));
12685          }
12686          if (bitQ == 0)
12687             putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12688       }
12689       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12690       DIP("frint%c %s.%s, %s.%s\n", ch,
12691           nameQReg128(dd), arr, nameQReg128(nn), arr);
12692       return True;
12693    }
12694 
12695    ix = 0; /*INVALID*/
12696    switch (opcode) {
12697       case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
12698       case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
12699       case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
12700       default: break;
12701    }
12702    if (ix > 0) {
12703       /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12704       /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12705       /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12706       /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12707       /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12708       /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12709       /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12710       /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12711       /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12712       /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12713       Bool isD = (size & 1) == 1;
12714       if (bitQ == 0 && isD) return False; // implied 1d case
12715 
12716       IRRoundingMode irrm = 8; /*impossible*/
12717       HChar          ch   = '?';
12718       switch (ix) {
12719          case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
12720          case 2: ch = 'm'; irrm = Irrm_NegINF;  break;
12721          case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
12722          case 4: ch = 'p'; irrm = Irrm_PosINF;  break;
12723          case 5: ch = 'z'; irrm = Irrm_ZERO;    break;
12724          default: vassert(0);
12725       }
12726       IROp cvt = Iop_INVALID;
12727       if (bitU == 1) {
12728          cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
12729       } else {
12730          cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
12731       }
12732       if (isD) {
12733          for (UInt i = 0; i < 2; i++) {
12734             putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12735                                             getQRegLane(nn, i, Ity_F64)));
12736          }
12737       } else {
12738          UInt n = bitQ==1 ? 4 : 2;
12739          for (UInt i = 0; i < n; i++) {
12740             putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12741                                             getQRegLane(nn, i, Ity_F32)));
12742          }
12743          if (bitQ == 0)
12744             putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12745       }
12746       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12747       DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
12748           nameQReg128(dd), arr, nameQReg128(nn), arr);
12749       return True;
12750    }
12751 
12752    if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
12753       /* -------- 0,10,11100: URECPE  4s_4s, 2s_2s -------- */
12754       /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
12755       Bool isREC = bitU == 0;
12756       IROp op    = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
12757       IRTemp res = newTempV128();
12758       assign(res, unop(op, getQReg128(nn)));
12759       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12760       const HChar* nm  = isREC ? "urecpe" : "ursqrte";
12761       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12762       DIP("%s %s.%s, %s.%s\n", nm,
12763           nameQReg128(dd), arr, nameQReg128(nn), arr);
12764       return True;
12765    }
12766 
12767    if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
12768       /* -------- 0,0x,11101: SCVTF -------- */
12769       /* -------- 1,0x,11101: UCVTF -------- */
12770       /* 31  28      22 21       15     9 4
12771          0q0 01110 0 sz 1  00001 110110 n d  SCVTF Vd, Vn
12772          0q1 01110 0 sz 1  00001 110110 n d  UCVTF Vd, Vn
12773          with laneage:
12774          case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
12775       */
12776       Bool isQ   = bitQ == 1;
12777       Bool isU   = bitU == 1;
12778       Bool isF64 = (size & 1) == 1;
12779       if (isQ || !isF64) {
12780          IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
12781          UInt   nLanes = 0;
12782          Bool   zeroHI = False;
12783          const HChar* arrSpec = NULL;
12784          Bool   ok  = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
12785                                        isQ, isF64 );
12786          IROp   iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
12787                           : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
12788          IRTemp rm  = mk_get_IR_rounding_mode();
12789          UInt   i;
12790          vassert(ok); /* the 'if' above should ensure this */
12791          for (i = 0; i < nLanes; i++) {
12792             putQRegLane(dd, i,
12793                         binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
12794          }
12795          if (zeroHI) {
12796             putQRegLane(dd, 1, mkU64(0));
12797          }
12798          DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
12799              nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
12800          return True;
12801       }
12802       /* else fall through */
12803    }
12804 
12805    if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
12806       /* -------- 0,1x,11101: FRECPE  2d_2d, 4s_4s, 2s_2s -------- */
12807       /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
12808       Bool isSQRT = bitU == 1;
12809       Bool isD    = (size & 1) == 1;
12810       IROp op     = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
12811                            : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
12812       if (bitQ == 0 && isD) return False; // implied 1d case
12813       IRTemp resV = newTempV128();
12814       assign(resV, unop(op, getQReg128(nn)));
12815       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12816       const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12817       DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
12818           nameQReg128(dd), arr, nameQReg128(nn), arr);
12819       return True;
12820    }
12821 
12822    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
12823       /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
12824       Bool isD = (size & 1) == 1;
12825       IROp op  = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
12826       if (bitQ == 0 && isD) return False; // implied 1d case
12827       IRTemp resV = newTempV128();
12828       assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
12829                              getQReg128(nn)));
12830       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12831       const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12832       DIP("%s %s.%s, %s.%s\n", "fsqrt",
12833           nameQReg128(dd), arr, nameQReg128(nn), arr);
12834       return True;
12835    }
12836 
12837    return False;
12838 #  undef INSN
12839 }
12840 
12841 
12842 static
dis_AdvSIMD_vector_x_indexed_elem(DisResult * dres,UInt insn)12843 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
12844 {
12845    /* 31    28    23   21 20 19 15     11   9 4
12846       0 Q U 01111 size L  M  m  opcode H  0 n d
12847       Decode fields are: u,size,opcode
12848       M is really part of the mm register number.  Individual
12849       cases need to inspect L and H though.
12850    */
12851 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12852    if (INSN(31,31) != 0
12853        || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
12854       return False;
12855    }
12856    UInt bitQ   = INSN(30,30);
12857    UInt bitU   = INSN(29,29);
12858    UInt size   = INSN(23,22);
12859    UInt bitL   = INSN(21,21);
12860    UInt bitM   = INSN(20,20);
12861    UInt mmLO4  = INSN(19,16);
12862    UInt opcode = INSN(15,12);
12863    UInt bitH   = INSN(11,11);
12864    UInt nn     = INSN(9,5);
12865    UInt dd     = INSN(4,0);
12866    vassert(size < 4);
12867    vassert(bitH < 2 && bitM < 2 && bitL < 2);
12868 
12869    if (bitU == 0 && size >= X10
12870        && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
12871       /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12872       /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12873       if (bitQ == 0 && size == X11) return False; // implied 1d case
12874       Bool isD   = (size & 1) == 1;
12875       Bool isSUB = opcode == BITS4(0,1,0,1);
12876       UInt index;
12877       if      (!isD)             index = (bitH << 1) | bitL;
12878       else if (isD && bitL == 0) index = bitH;
12879       else return False; // sz:L == x11 => unallocated encoding
12880       vassert(index < (isD ? 2 : 4));
12881       IRType ity   = isD ? Ity_F64 : Ity_F32;
12882       IRTemp elem  = newTemp(ity);
12883       UInt   mm    = (bitM << 4) | mmLO4;
12884       assign(elem, getQRegLane(mm, index, ity));
12885       IRTemp dupd  = math_DUP_TO_V128(elem, ity);
12886       IROp   opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
12887       IROp   opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12888       IROp   opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
12889       IRTemp rm    = mk_get_IR_rounding_mode();
12890       IRTemp t1    = newTempV128();
12891       IRTemp t2    = newTempV128();
12892       // FIXME: double rounding; use FMA primops instead
12893       assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
12894       assign(t2, triop(isSUB ? opSUB : opADD,
12895                        mkexpr(rm), getQReg128(dd), mkexpr(t1)));
12896       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12897       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12898       DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
12899           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
12900           isD ? 'd' : 's', index);
12901       return True;
12902    }
12903 
12904    if (size >= X10 && opcode == BITS4(1,0,0,1)) {
12905       /* -------- 0,1x,1001 FMUL  2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12906       /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12907       if (bitQ == 0 && size == X11) return False; // implied 1d case
12908       Bool isD    = (size & 1) == 1;
12909       Bool isMULX = bitU == 1;
12910       UInt index;
12911       if      (!isD)             index = (bitH << 1) | bitL;
12912       else if (isD && bitL == 0) index = bitH;
12913       else return False; // sz:L == x11 => unallocated encoding
12914       vassert(index < (isD ? 2 : 4));
12915       IRType ity  = isD ? Ity_F64 : Ity_F32;
12916       IRTemp elem = newTemp(ity);
12917       UInt   mm   = (bitM << 4) | mmLO4;
12918       assign(elem, getQRegLane(mm, index, ity));
12919       IRTemp dupd = math_DUP_TO_V128(elem, ity);
12920       // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
12921       IRTemp res  = newTempV128();
12922       assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12923                         mkexpr(mk_get_IR_rounding_mode()),
12924                         getQReg128(nn), mkexpr(dupd)));
12925       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12926       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12927       DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
12928           isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
12929           nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
12930       return True;
12931    }
12932 
12933    if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
12934        || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
12935       /* -------- 1,xx,0000 MLA s/h variants only -------- */
12936       /* -------- 1,xx,0100 MLS s/h variants only -------- */
12937       /* -------- 0,xx,1000 MUL s/h variants only -------- */
12938       Bool isMLA = opcode == BITS4(0,0,0,0);
12939       Bool isMLS = opcode == BITS4(0,1,0,0);
12940       UInt mm    = 32; // invalid
12941       UInt ix    = 16; // invalid
12942       switch (size) {
12943          case X00:
12944             return False; // b case is not allowed
12945          case X01:
12946             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12947          case X10:
12948             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12949          case X11:
12950             return False; // d case is not allowed
12951          default:
12952             vassert(0);
12953       }
12954       vassert(mm < 32 && ix < 16);
12955       IROp   opMUL = mkVecMUL(size);
12956       IROp   opADD = mkVecADD(size);
12957       IROp   opSUB = mkVecSUB(size);
12958       HChar  ch    = size == X01 ? 'h' : 's';
12959       IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12960       IRTemp vecD  = newTempV128();
12961       IRTemp vecN  = newTempV128();
12962       IRTemp res   = newTempV128();
12963       assign(vecD, getQReg128(dd));
12964       assign(vecN, getQReg128(nn));
12965       IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
12966       if (isMLA || isMLS) {
12967          assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
12968       } else {
12969          assign(res, prod);
12970       }
12971       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12972       const HChar* arr = nameArr_Q_SZ(bitQ, size);
12973       DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
12974                                                 : (isMLS ? "mls" : "mul"),
12975           nameQReg128(dd), arr,
12976           nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12977       return True;
12978    }
12979 
12980    if (opcode == BITS4(1,0,1,0)
12981        || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
12982       /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
12983       /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
12984       /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
12985       /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
12986       /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
12987       /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
12988       /* Widens, and size refers to the narrowed lanes. */
12989       UInt ks = 3;
12990       switch (opcode) {
12991          case BITS4(1,0,1,0): ks = 0; break;
12992          case BITS4(0,0,1,0): ks = 1; break;
12993          case BITS4(0,1,1,0): ks = 2; break;
12994          default: vassert(0);
12995       }
12996       vassert(ks >= 0 && ks <= 2);
12997       Bool isU = bitU == 1;
12998       Bool is2 = bitQ == 1;
12999       UInt mm  = 32; // invalid
13000       UInt ix  = 16; // invalid
13001       switch (size) {
13002          case X00:
13003             return False; // h_b_b[] case is not allowed
13004          case X01:
13005             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13006          case X10:
13007             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13008          case X11:
13009             return False; // q_d_d[] case is not allowed
13010          default:
13011             vassert(0);
13012       }
13013       vassert(mm < 32 && ix < 16);
13014       IRTemp vecN  = newTempV128();
13015       IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13016       IRTemp vecD  = newTempV128();
13017       assign(vecN, getQReg128(nn));
13018       assign(vecD, getQReg128(dd));
13019       IRTemp res = IRTemp_INVALID;
13020       math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
13021                     vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
13022       putQReg128(dd, mkexpr(res));
13023       const HChar* nm        = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
13024       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13025       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
13026       HChar ch               = size == X01 ? 'h' : 's';
13027       DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13028           isU ? 'u' : 's', nm, is2 ? "2" : "",
13029           nameQReg128(dd), arrWide,
13030           nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
13031       return True;
13032    }
13033 
13034    if (bitU == 0
13035        && (opcode == BITS4(1,0,1,1)
13036            || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
13037       /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
13038       /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
13039       /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
13040       /* Widens, and size refers to the narrowed lanes. */
13041       UInt ks = 3;
13042       switch (opcode) {
13043          case BITS4(1,0,1,1): ks = 0; break;
13044          case BITS4(0,0,1,1): ks = 1; break;
13045          case BITS4(0,1,1,1): ks = 2; break;
13046          default: vassert(0);
13047       }
13048       vassert(ks >= 0 && ks <= 2);
13049       Bool is2 = bitQ == 1;
13050       UInt mm  = 32; // invalid
13051       UInt ix  = 16; // invalid
13052       switch (size) {
13053          case X00:
13054             return False; // h_b_b[] case is not allowed
13055          case X01:
13056             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13057          case X10:
13058             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13059          case X11:
13060             return False; // q_d_d[] case is not allowed
13061          default:
13062             vassert(0);
13063       }
13064       vassert(mm < 32 && ix < 16);
13065       IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
13066       vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
13067       newTempsV128_2(&vecN, &vecD);
13068       assign(vecN, getQReg128(nn));
13069       IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13070       assign(vecD, getQReg128(dd));
13071       math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
13072                        is2, size, "mas"[ks],
13073                        vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
13074       putQReg128(dd, mkexpr(res));
13075       vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
13076       updateQCFLAGwithDifference(sat1q, sat1n);
13077       if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
13078          updateQCFLAGwithDifference(sat2q, sat2n);
13079       }
13080       const HChar* nm        = ks == 0 ? "sqdmull"
13081                                        : (ks == 1 ? "sqdmlal" : "sqdmlsl");
13082       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13083       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
13084       HChar ch               = size == X01 ? 'h' : 's';
13085       DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13086           nm, is2 ? "2" : "",
13087           nameQReg128(dd), arrWide,
13088           nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
13089       return True;
13090    }
13091 
13092    if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
13093       /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
13094       /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
13095       UInt mm  = 32; // invalid
13096       UInt ix  = 16; // invalid
13097       switch (size) {
13098          case X00:
13099             return False; // b case is not allowed
13100          case X01:
13101             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13102          case X10:
13103             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13104          case X11:
13105             return False; // q case is not allowed
13106          default:
13107             vassert(0);
13108       }
13109       vassert(mm < 32 && ix < 16);
13110       Bool isR = opcode == BITS4(1,1,0,1);
13111       IRTemp res, sat1q, sat1n, vN, vM;
13112       res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
13113       vN = newTempV128();
13114       assign(vN, getQReg128(nn));
13115       vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13116       math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
13117       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13118       IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
13119       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
13120       const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
13121       const HChar* arr = nameArr_Q_SZ(bitQ, size);
13122       HChar ch         = size == X01 ? 'h' : 's';
13123       DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
13124           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
13125       return True;
13126    }
13127 
13128    return False;
13129 #  undef INSN
13130 }
13131 
13132 
13133 static
dis_AdvSIMD_crypto_aes(DisResult * dres,UInt insn)13134 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
13135 {
13136    /* 31        23   21    16     11 9 4
13137       0100 1110 size 10100 opcode 10 n d
13138       Decode fields are: size,opcode
13139       Size is always 00 in ARMv8, it appears.
13140    */
13141 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13142    if (INSN(31,24) != BITS8(0,1,0,0,1,1,1,0)
13143       || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13144       return False;
13145    }
13146    UInt size   = INSN(23,22);
13147    UInt opcode = INSN(16,12);
13148    UInt nn     = INSN(9,5);
13149    UInt dd     = INSN(4,0);
13150 
13151    if (size == BITS2(0,0)
13152        && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,0,1))) {
13153       /* -------- 00,00100: AESE Vd.16b, Vn.16b -------- */
13154       /* -------- 00,00101: AESD Vd.16b, Vn.16b -------- */
13155       Bool   isD  = opcode == BITS5(0,0,1,0,1);
13156       IRTemp op1  = newTemp(Ity_V128);
13157       IRTemp op2  = newTemp(Ity_V128);
13158       IRTemp xord = newTemp(Ity_V128);
13159       IRTemp res  = newTemp(Ity_V128);
13160       void*        helper = isD ? &arm64g_dirtyhelper_AESD
13161                                 : &arm64g_dirtyhelper_AESE;
13162       const HChar* hname  = isD ? "arm64g_dirtyhelper_AESD"
13163                                 : "arm64g_dirtyhelper_AESE";
13164       assign(op1, getQReg128(dd));
13165       assign(op2, getQReg128(nn));
13166       assign(xord, binop(Iop_XorV128, mkexpr(op1), mkexpr(op2)));
13167       IRDirty* di
13168          = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13169                               mkIRExprVec_3(
13170                                  IRExpr_VECRET(),
13171                                  unop(Iop_V128HIto64, mkexpr(xord)),
13172                                  unop(Iop_V128to64, mkexpr(xord)) ) );
13173       stmt(IRStmt_Dirty(di));
13174       putQReg128(dd, mkexpr(res));
13175       DIP("aes%c %s.16b, %s.16b\n", isD ? 'd' : 'e',
13176                                     nameQReg128(dd), nameQReg128(nn));
13177       return True;
13178    }
13179 
13180    if (size == BITS2(0,0)
13181        && (opcode == BITS5(0,0,1,1,0) || opcode == BITS5(0,0,1,1,1))) {
13182       /* -------- 00,00110: AESMC  Vd.16b, Vn.16b -------- */
13183       /* -------- 00,00111: AESIMC Vd.16b, Vn.16b -------- */
13184       Bool   isI  = opcode == BITS5(0,0,1,1,1);
13185       IRTemp src  = newTemp(Ity_V128);
13186       IRTemp res  = newTemp(Ity_V128);
13187       void*        helper = isI ? &arm64g_dirtyhelper_AESIMC
13188                                 : &arm64g_dirtyhelper_AESMC;
13189       const HChar* hname  = isI ? "arm64g_dirtyhelper_AESIMC"
13190                                 : "arm64g_dirtyhelper_AESMC";
13191       assign(src, getQReg128(nn));
13192       IRDirty* di
13193          = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13194                               mkIRExprVec_3(
13195                                  IRExpr_VECRET(),
13196                                  unop(Iop_V128HIto64, mkexpr(src)),
13197                                  unop(Iop_V128to64, mkexpr(src)) ) );
13198       stmt(IRStmt_Dirty(di));
13199       putQReg128(dd, mkexpr(res));
13200       DIP("aes%s %s.16b, %s.16b\n", isI ? "imc" : "mc",
13201                                     nameQReg128(dd), nameQReg128(nn));
13202       return True;
13203    }
13204 
13205    return False;
13206 #  undef INSN
13207 }
13208 
13209 
13210 static
dis_AdvSIMD_crypto_three_reg_sha(DisResult * dres,UInt insn)13211 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13212 {
13213    /* 31   28   23 21 20 15 14  11 9 4
13214       0101 1110 sz 0  m  0  opc 00 n d
13215       Decode fields are: sz,opc
13216    */
13217 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13218    if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0) || INSN(21,21) != 0
13219        || INSN(15,15) != 0 || INSN(11,10) != BITS2(0,0)) {
13220       return False;
13221    }
13222    UInt sz  = INSN(23,22);
13223    UInt mm  = INSN(20,16);
13224    UInt opc = INSN(14,12);
13225    UInt nn  = INSN(9,5);
13226    UInt dd  = INSN(4,0);
13227    if (sz == BITS2(0,0) && opc <= BITS3(1,1,0)) {
13228       /* -------- 00,000 SHA1C     Qd,    Sn,    Vm.4S -------- */
13229       /* -------- 00,001 SHA1P     Qd,    Sn,    Vm.4S -------- */
13230       /* -------- 00,010 SHA1M     Qd,    Sn,    Vm.4S -------- */
13231       /* -------- 00,011 SHA1SU0   Vd.4S, Vn.4S, Vm.4S -------- */
13232       /* -------- 00,100 SHA256H   Qd,    Qn,    Vm.4S -------- */
13233       /* -------- 00,101 SHA256H2  Qd,    Qn,    Vm.4S -------- */
13234       /* -------- 00,110 SHA256SU1 Vd.4S, Vn.4S, Vm.4S -------- */
13235       vassert(opc < 7);
13236       const HChar* inames[7]
13237          = { "sha1c", "sha1p", "sha1m", "sha1su0",
13238              "sha256h", "sha256h2", "sha256su1" };
13239       void(*helpers[7])(V128*,ULong,ULong,ULong,ULong,ULong,ULong)
13240          = { &arm64g_dirtyhelper_SHA1C,    &arm64g_dirtyhelper_SHA1P,
13241              &arm64g_dirtyhelper_SHA1M,    &arm64g_dirtyhelper_SHA1SU0,
13242              &arm64g_dirtyhelper_SHA256H,  &arm64g_dirtyhelper_SHA256H2,
13243              &arm64g_dirtyhelper_SHA256SU1 };
13244       const HChar* hnames[7]
13245          = { "arm64g_dirtyhelper_SHA1C",    "arm64g_dirtyhelper_SHA1P",
13246              "arm64g_dirtyhelper_SHA1M",    "arm64g_dirtyhelper_SHA1SU0",
13247              "arm64g_dirtyhelper_SHA256H",  "arm64g_dirtyhelper_SHA256H2",
13248              "arm64g_dirtyhelper_SHA256SU1" };
13249       IRTemp vD      = newTemp(Ity_V128);
13250       IRTemp vN      = newTemp(Ity_V128);
13251       IRTemp vM      = newTemp(Ity_V128);
13252       IRTemp vDhi    = newTemp(Ity_I64);
13253       IRTemp vDlo    = newTemp(Ity_I64);
13254       IRTemp vNhiPre = newTemp(Ity_I64);
13255       IRTemp vNloPre = newTemp(Ity_I64);
13256       IRTemp vNhi    = newTemp(Ity_I64);
13257       IRTemp vNlo    = newTemp(Ity_I64);
13258       IRTemp vMhi    = newTemp(Ity_I64);
13259       IRTemp vMlo    = newTemp(Ity_I64);
13260       assign(vD,      getQReg128(dd));
13261       assign(vN,      getQReg128(nn));
13262       assign(vM,      getQReg128(mm));
13263       assign(vDhi,    unop(Iop_V128HIto64, mkexpr(vD)));
13264       assign(vDlo,    unop(Iop_V128to64,   mkexpr(vD)));
13265       assign(vNhiPre, unop(Iop_V128HIto64, mkexpr(vN)));
13266       assign(vNloPre, unop(Iop_V128to64,   mkexpr(vN)));
13267       assign(vMhi,    unop(Iop_V128HIto64, mkexpr(vM)));
13268       assign(vMlo,    unop(Iop_V128to64,   mkexpr(vM)));
13269       /* Mask off any bits of the N register operand that aren't actually
13270          needed, so that Memcheck doesn't complain unnecessarily. */
13271       switch (opc) {
13272          case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13273             assign(vNhi, mkU64(0));
13274             assign(vNlo, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(vNloPre))));
13275             break;
13276          case BITS3(0,1,1): case BITS3(1,0,0):
13277          case BITS3(1,0,1): case BITS3(1,1,0):
13278             assign(vNhi, mkexpr(vNhiPre));
13279             assign(vNlo, mkexpr(vNloPre));
13280             break;
13281          default:
13282             vassert(0);
13283       }
13284       IRTemp res = newTemp(Ity_V128);
13285       IRDirty* di
13286          = unsafeIRDirty_1_N( res, 0/*regparms*/, hnames[opc], helpers[opc],
13287                               mkIRExprVec_7(
13288                                  IRExpr_VECRET(),
13289                                  mkexpr(vDhi), mkexpr(vDlo), mkexpr(vNhi),
13290                                  mkexpr(vNlo), mkexpr(vMhi), mkexpr(vMlo)));
13291       stmt(IRStmt_Dirty(di));
13292       putQReg128(dd, mkexpr(res));
13293       switch (opc) {
13294          case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13295             DIP("%s q%u, s%u, v%u.4s\n", inames[opc], dd, nn, mm);
13296             break;
13297          case BITS3(0,1,1): case BITS3(1,1,0):
13298             DIP("%s v%u.4s, v%u.4s, v%u.4s\n", inames[opc], dd, nn, mm);
13299             break;
13300          case BITS3(1,0,0): case BITS3(1,0,1):
13301             DIP("%s q%u, q%u, v%u.4s\n", inames[opc], dd, nn, mm);
13302             break;
13303          default:
13304             vassert(0);
13305       }
13306       return True;
13307    }
13308 
13309    return False;
13310 #  undef INSN
13311 }
13312 
13313 
13314 static
dis_AdvSIMD_crypto_two_reg_sha(DisResult * dres,UInt insn)13315 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13316 {
13317    /* 31   28   23 21    16  11 9 4
13318       0101 1110 sz 10100 opc 10 n d
13319       Decode fields are: sz,opc
13320    */
13321 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13322    if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0)
13323        || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13324       return False;
13325    }
13326    UInt sz  = INSN(23,22);
13327    UInt opc = INSN(16,12);
13328    UInt nn  = INSN(9,5);
13329    UInt dd  = INSN(4,0);
13330    if (sz == BITS2(0,0) && opc <= BITS5(0,0,0,1,0)) {
13331       /* -------- 00,00000 SHA1H     Sd,    Sn    -------- */
13332       /* -------- 00,00001 SHA1SU1   Vd.4S, Vn.4S -------- */
13333       /* -------- 00,00010 SHA256SU0 Vd.4S, Vn.4S -------- */
13334       vassert(opc < 3);
13335       const HChar* inames[3] = { "sha1h", "sha1su1", "sha256su0" };
13336       IRTemp vD   = newTemp(Ity_V128);
13337       IRTemp vN   = newTemp(Ity_V128);
13338       IRTemp vDhi = newTemp(Ity_I64);
13339       IRTemp vDlo = newTemp(Ity_I64);
13340       IRTemp vNhi = newTemp(Ity_I64);
13341       IRTemp vNlo = newTemp(Ity_I64);
13342       assign(vD,   getQReg128(dd));
13343       assign(vN,   getQReg128(nn));
13344       assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
13345       assign(vDlo, unop(Iop_V128to64,   mkexpr(vD)));
13346       assign(vNhi, unop(Iop_V128HIto64, mkexpr(vN)));
13347       assign(vNlo, unop(Iop_V128to64,   mkexpr(vN)));
13348       /* Mask off any bits of the N register operand that aren't actually
13349          needed, so that Memcheck doesn't complain unnecessarily.  Also
13350          construct the calls, given that the helper functions don't take
13351          the same number of arguments. */
13352       IRDirty* di  = NULL;
13353       IRTemp   res = newTemp(Ity_V128);
13354       switch (opc) {
13355          case BITS5(0,0,0,0,0): {
13356             IRExpr* vNloMasked = unop(Iop_32Uto64,
13357                                       unop(Iop_64to32, mkexpr(vNlo)));
13358             di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13359                                     "arm64g_dirtyhelper_SHA1H",
13360                                     &arm64g_dirtyhelper_SHA1H,
13361                                     mkIRExprVec_3(
13362                                        IRExpr_VECRET(),
13363                                        mkU64(0), vNloMasked) );
13364             break;
13365          }
13366          case BITS5(0,0,0,0,1):
13367             di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13368                                     "arm64g_dirtyhelper_SHA1SU1",
13369                                     &arm64g_dirtyhelper_SHA1SU1,
13370                                     mkIRExprVec_5(
13371                                        IRExpr_VECRET(),
13372                                        mkexpr(vDhi), mkexpr(vDlo),
13373                                        mkexpr(vNhi), mkexpr(vNlo)) );
13374             break;
13375          case BITS5(0,0,0,1,0):
13376             di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13377                                     "arm64g_dirtyhelper_SHA256SU0",
13378                                     &arm64g_dirtyhelper_SHA256SU0,
13379                                     mkIRExprVec_5(
13380                                        IRExpr_VECRET(),
13381                                        mkexpr(vDhi), mkexpr(vDlo),
13382                                        mkexpr(vNhi), mkexpr(vNlo)) );
13383             break;
13384          default:
13385             vassert(0);
13386       }
13387       stmt(IRStmt_Dirty(di));
13388       putQReg128(dd, mkexpr(res));
13389       switch (opc) {
13390          case BITS5(0,0,0,0,0):
13391             DIP("%s s%u, s%u\n", inames[opc], dd, nn);
13392             break;
13393          case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,0):
13394             DIP("%s v%u.4s, v%u.4s\n", inames[opc], dd, nn);
13395             break;
13396          default:
13397             vassert(0);
13398       }
13399       return True;
13400    }
13401 
13402    return False;
13403 #  undef INSN
13404 }
13405 
13406 
13407 static
dis_AdvSIMD_fp_compare(DisResult * dres,UInt insn)13408 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
13409 {
13410    /* 31  28    23 21 20 15 13   9 4
13411       000 11110 ty 1  m  op 1000 n opcode2
13412       The first 3 bits are really "M 0 S", but M and S are always zero.
13413       Decode fields are: ty,op,opcode2
13414    */
13415 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13416    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13417        || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
13418       return False;
13419    }
13420    UInt ty      = INSN(23,22);
13421    UInt mm      = INSN(20,16);
13422    UInt op      = INSN(15,14);
13423    UInt nn      = INSN(9,5);
13424    UInt opcode2 = INSN(4,0);
13425    vassert(ty < 4);
13426 
13427    if (ty <= X01 && op == X00
13428        && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
13429       /* -------- 0x,00,00000 FCMP  d_d,   s_s -------- */
13430       /* -------- 0x,00,01000 FCMP  d_#0, s_#0 -------- */
13431       /* -------- 0x,00,10000 FCMPE d_d,   s_s -------- */
13432       /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
13433       /* 31        23   20    15      9 4
13434          000 11110 01 1     m 00 1000 n 10 000  FCMPE Dn, Dm
13435          000 11110 01 1 00000 00 1000 n 11 000  FCMPE Dn, #0.0
13436          000 11110 01 1     m 00 1000 n 00 000  FCMP  Dn, Dm
13437          000 11110 01 1 00000 00 1000 n 01 000  FCMP  Dn, #0.0
13438 
13439          000 11110 00 1     m 00 1000 n 10 000  FCMPE Sn, Sm
13440          000 11110 00 1 00000 00 1000 n 11 000  FCMPE Sn, #0.0
13441          000 11110 00 1     m 00 1000 n 00 000  FCMP  Sn, Sm
13442          000 11110 00 1 00000 00 1000 n 01 000  FCMP  Sn, #0.0
13443 
13444          FCMPE generates Invalid Operation exn if either arg is any kind
13445          of NaN.  FCMP generates Invalid Operation exn if either arg is a
13446          signalling NaN.  We ignore this detail here and produce the same
13447          IR for both.
13448       */
13449       Bool   isD     = (ty & 1) == 1;
13450       Bool   isCMPE  = (opcode2 & 16) == 16;
13451       Bool   cmpZero = (opcode2 & 8) == 8;
13452       IRType ity     = isD ? Ity_F64 : Ity_F32;
13453       Bool   valid   = True;
13454       if (cmpZero && mm != 0) valid = False;
13455       if (valid) {
13456          IRTemp argL  = newTemp(ity);
13457          IRTemp argR  = newTemp(ity);
13458          IRTemp irRes = newTemp(Ity_I32);
13459          assign(argL, getQRegLO(nn, ity));
13460          assign(argR,
13461                 cmpZero
13462                    ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
13463                    : getQRegLO(mm, ity));
13464          assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
13465                              mkexpr(argL), mkexpr(argR)));
13466          IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13467          IRTemp nzcv_28x0 = newTemp(Ity_I64);
13468          assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
13469          setFlags_COPY(nzcv_28x0);
13470          DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
13471              cmpZero ? "#0.0" : nameQRegLO(mm, ity));
13472          return True;
13473       }
13474       return False;
13475    }
13476 
13477    return False;
13478 #  undef INSN
13479 }
13480 
13481 
13482 static
dis_AdvSIMD_fp_conditional_compare(DisResult * dres,UInt insn)13483 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
13484 {
13485    /* 31  28    23 21 20 15   11 9 4  3
13486       000 11110 ty 1  m  cond 01 n op nzcv
13487       The first 3 bits are really "M 0 S", but M and S are always zero.
13488       Decode fields are: ty,op
13489    */
13490 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13491    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13492        || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
13493       return False;
13494    }
13495    UInt ty   = INSN(23,22);
13496    UInt mm   = INSN(20,16);
13497    UInt cond = INSN(15,12);
13498    UInt nn   = INSN(9,5);
13499    UInt op   = INSN(4,4);
13500    UInt nzcv = INSN(3,0);
13501    vassert(ty < 4 && op <= 1);
13502 
13503    if (ty <= BITS2(0,1)) {
13504       /* -------- 00,0 FCCMP  s_s -------- */
13505       /* -------- 00,1 FCCMPE s_s -------- */
13506       /* -------- 01,0 FCCMP  d_d -------- */
13507       /* -------- 01,1 FCCMPE d_d -------- */
13508 
13509       /* FCCMPE generates Invalid Operation exn if either arg is any kind
13510          of NaN.  FCCMP generates Invalid Operation exn if either arg is a
13511          signalling NaN.  We ignore this detail here and produce the same
13512          IR for both.
13513       */
13514       Bool   isD    = (ty & 1) == 1;
13515       Bool   isCMPE = op == 1;
13516       IRType ity    = isD ? Ity_F64 : Ity_F32;
13517       IRTemp argL   = newTemp(ity);
13518       IRTemp argR   = newTemp(ity);
13519       IRTemp irRes  = newTemp(Ity_I32);
13520       assign(argL,  getQRegLO(nn, ity));
13521       assign(argR,  getQRegLO(mm, ity));
13522       assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
13523                           mkexpr(argL), mkexpr(argR)));
13524       IRTemp condT = newTemp(Ity_I1);
13525       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
13526       IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13527 
13528       IRTemp nzcvT_28x0 = newTemp(Ity_I64);
13529       assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
13530 
13531       IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
13532 
13533       IRTemp nzcv_28x0 = newTemp(Ity_I64);
13534       assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
13535                                    mkexpr(nzcvT_28x0), nzcvF_28x0));
13536       setFlags_COPY(nzcv_28x0);
13537       DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
13538           nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
13539       return True;
13540    }
13541 
13542    return False;
13543 #  undef INSN
13544 }
13545 
13546 
13547 static
dis_AdvSIMD_fp_conditional_select(DisResult * dres,UInt insn)13548 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
13549 {
13550    /* 31        23 21 20 15   11 9 5
13551       000 11110 ty 1  m  cond 11 n d
13552       The first 3 bits are really "M 0 S", but M and S are always zero.
13553       Decode fields: ty
13554    */
13555 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13556    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
13557        || INSN(11,10) != BITS2(1,1)) {
13558       return False;
13559    }
13560    UInt ty   = INSN(23,22);
13561    UInt mm   = INSN(20,16);
13562    UInt cond = INSN(15,12);
13563    UInt nn   = INSN(9,5);
13564    UInt dd   = INSN(4,0);
13565    if (ty <= X01) {
13566       /* -------- 00: FCSEL s_s -------- */
13567       /* -------- 00: FCSEL d_d -------- */
13568       IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
13569       IRTemp srcT = newTemp(ity);
13570       IRTemp srcF = newTemp(ity);
13571       IRTemp res  = newTemp(ity);
13572       assign(srcT, getQRegLO(nn, ity));
13573       assign(srcF, getQRegLO(mm, ity));
13574       assign(res, IRExpr_ITE(
13575                      unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
13576                      mkexpr(srcT), mkexpr(srcF)));
13577       putQReg128(dd, mkV128(0x0000));
13578       putQRegLO(dd, mkexpr(res));
13579       DIP("fcsel %s, %s, %s, %s\n",
13580           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
13581           nameCC(cond));
13582       return True;
13583    }
13584    return False;
13585 #  undef INSN
13586 }
13587 
13588 
13589 static
dis_AdvSIMD_fp_data_proc_1_source(DisResult * dres,UInt insn)13590 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
13591 {
13592    /* 31  28    23 21 20     14    9 4
13593       000 11110 ty 1  opcode 10000 n d
13594       The first 3 bits are really "M 0 S", but M and S are always zero.
13595       Decode fields: ty,opcode
13596    */
13597 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13598    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13599        || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
13600       return False;
13601    }
13602    UInt ty     = INSN(23,22);
13603    UInt opcode = INSN(20,15);
13604    UInt nn     = INSN(9,5);
13605    UInt dd     = INSN(4,0);
13606 
13607    if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
13608       /* -------- 0x,000000: FMOV  d_d, s_s -------- */
13609       /* -------- 0x,000001: FABS  d_d, s_s -------- */
13610       /* -------- 0x,000010: FNEG  d_d, s_s -------- */
13611       /* -------- 0x,000011: FSQRT d_d, s_s -------- */
13612       IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
13613       IRTemp src = newTemp(ity);
13614       IRTemp res = newTemp(ity);
13615       const HChar* nm = "??";
13616       assign(src, getQRegLO(nn, ity));
13617       switch (opcode) {
13618          case BITS6(0,0,0,0,0,0):
13619             nm = "fmov"; assign(res, mkexpr(src)); break;
13620          case BITS6(0,0,0,0,0,1):
13621             nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
13622          case BITS6(0,0,0,0,1,0):
13623             nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
13624          case BITS6(0,0,0,0,1,1):
13625             nm = "fsqrt";
13626             assign(res, binop(mkSQRTF(ity),
13627                               mkexpr(mk_get_IR_rounding_mode()),
13628                               mkexpr(src))); break;
13629          default:
13630             vassert(0);
13631       }
13632       putQReg128(dd, mkV128(0x0000));
13633       putQRegLO(dd, mkexpr(res));
13634       DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13635       return True;
13636    }
13637 
13638    if (   (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
13639                          || opcode == BITS6(0,0,0,1,0,1)))
13640        || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
13641                          || opcode == BITS6(0,0,0,1,0,1)))
13642        || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
13643                          || opcode == BITS6(0,0,0,1,0,0)))) {
13644       /* -------- 11,000100: FCVT s_h -------- */
13645       /* -------- 11,000101: FCVT d_h -------- */
13646       /* -------- 00,000111: FCVT h_s -------- */
13647       /* -------- 00,000101: FCVT d_s -------- */
13648       /* -------- 01,000111: FCVT h_d -------- */
13649       /* -------- 01,000100: FCVT s_d -------- */
13650       /* 31        23 21    16 14    9 4
13651          000 11110 11 10001 00 10000 n d   FCVT Sd, Hn
13652          --------- 11 ----- 01 ---------   FCVT Dd, Hn
13653          --------- 00 ----- 11 ---------   FCVT Hd, Sn
13654          --------- 00 ----- 01 ---------   FCVT Dd, Sn
13655          --------- 01 ----- 11 ---------   FCVT Hd, Dn
13656          --------- 01 ----- 00 ---------   FCVT Sd, Dn
13657          Rounding, when dst is smaller than src, is per the FPCR.
13658       */
13659       UInt b2322 = ty;
13660       UInt b1615 = opcode & BITS2(1,1);
13661       switch ((b2322 << 2) | b1615) {
13662          case BITS4(0,0,0,1):   // S -> D
13663          case BITS4(1,1,0,1): { // H -> D
13664             Bool   srcIsH = b2322 == BITS2(1,1);
13665             IRType srcTy  = srcIsH ? Ity_F16 : Ity_F32;
13666             IRTemp res    = newTemp(Ity_F64);
13667             assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
13668                              getQRegLO(nn, srcTy)));
13669             putQReg128(dd, mkV128(0x0000));
13670             putQRegLO(dd, mkexpr(res));
13671             DIP("fcvt %s, %s\n",
13672                 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
13673             return True;
13674          }
13675          case BITS4(0,1,0,0):   // D -> S
13676          case BITS4(0,1,1,1): { // D -> H
13677             Bool   dstIsH = b1615 == BITS2(1,1);
13678             IRType dstTy  = dstIsH ? Ity_F16 : Ity_F32;
13679             IRTemp res    = newTemp(dstTy);
13680             assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
13681                               mkexpr(mk_get_IR_rounding_mode()),
13682                               getQRegLO(nn, Ity_F64)));
13683             putQReg128(dd, mkV128(0x0000));
13684             putQRegLO(dd, mkexpr(res));
13685             DIP("fcvt %s, %s\n",
13686                 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
13687             return True;
13688          }
13689          case BITS4(0,0,1,1):   // S -> H
13690          case BITS4(1,1,0,0): { // H -> S
13691             Bool   toH   = b1615 == BITS2(1,1);
13692             IRType srcTy = toH ? Ity_F32 : Ity_F16;
13693             IRType dstTy = toH ? Ity_F16 : Ity_F32;
13694             IRTemp res = newTemp(dstTy);
13695             if (toH) {
13696                assign(res, binop(Iop_F32toF16,
13697                                  mkexpr(mk_get_IR_rounding_mode()),
13698                                  getQRegLO(nn, srcTy)));
13699 
13700             } else {
13701                assign(res, unop(Iop_F16toF32,
13702                                 getQRegLO(nn, srcTy)));
13703             }
13704             putQReg128(dd, mkV128(0x0000));
13705             putQRegLO(dd, mkexpr(res));
13706             DIP("fcvt %s, %s\n",
13707                 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
13708             return True;
13709          }
13710          default:
13711             break;
13712       }
13713       /* else unhandled */
13714       return False;
13715    }
13716 
13717    if (ty <= X01
13718        && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
13719        && opcode != BITS6(0,0,1,1,0,1)) {
13720       /* -------- 0x,001000 FRINTN d_d, s_s -------- */
13721       /* -------- 0x,001001 FRINTP d_d, s_s -------- */
13722       /* -------- 0x,001010 FRINTM d_d, s_s -------- */
13723       /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
13724       /* -------- 0x,001100 FRINTA d_d, s_s -------- */
13725       /* -------- 0x,001110 FRINTX d_d, s_s -------- */
13726       /* -------- 0x,001111 FRINTI d_d, s_s -------- */
13727       /* 31        23 21   17  14    9 4
13728          000 11110 0x 1001 111 10000 n d  FRINTI Fd, Fm (round per FPCR)
13729                            rm
13730          x==0 => S-registers, x==1 => D-registers
13731          rm (17:15) encodings:
13732             111 per FPCR  (FRINTI)
13733             001 +inf      (FRINTP)
13734             010 -inf      (FRINTM)
13735             011 zero      (FRINTZ)
13736             000 tieeven   (FRINTN) -- !! FIXME KLUDGED !!
13737             100 tieaway   (FRINTA) -- !! FIXME KLUDGED !!
13738             110 per FPCR + "exact = TRUE" (FRINTX)
13739             101 unallocated
13740       */
13741       Bool    isD   = (ty & 1) == 1;
13742       UInt    rm    = opcode & BITS6(0,0,0,1,1,1);
13743       IRType  ity   = isD ? Ity_F64 : Ity_F32;
13744       IRExpr* irrmE = NULL;
13745       UChar   ch    = '?';
13746       switch (rm) {
13747          case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
13748          case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
13749          case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
13750          // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
13751          case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
13752          // I am unsure about the following, due to the "integral exact"
13753          // description in the manual.  What does it mean? (frintx, that is)
13754          case BITS3(1,1,0):
13755             ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13756          case BITS3(1,1,1):
13757             ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13758          // The following is a kludge.  There's no Irrm_ value to represent
13759          // this ("to nearest, with ties to even")
13760          case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
13761          default: break;
13762       }
13763       if (irrmE) {
13764          IRTemp src = newTemp(ity);
13765          IRTemp dst = newTemp(ity);
13766          assign(src, getQRegLO(nn, ity));
13767          assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13768                            irrmE, mkexpr(src)));
13769          putQReg128(dd, mkV128(0x0000));
13770          putQRegLO(dd, mkexpr(dst));
13771          DIP("frint%c %s, %s\n",
13772              ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13773          return True;
13774       }
13775       return False;
13776    }
13777 
13778    return False;
13779 #  undef INSN
13780 }
13781 
13782 
13783 static
dis_AdvSIMD_fp_data_proc_2_source(DisResult * dres,UInt insn)13784 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
13785 {
13786    /* 31  28    23 21 20 15     11 9 4
13787       000 11110 ty 1  m  opcode 10 n d
13788       The first 3 bits are really "M 0 S", but M and S are always zero.
13789       Decode fields: ty, opcode
13790    */
13791 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13792    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13793        || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
13794       return False;
13795    }
13796    UInt ty     = INSN(23,22);
13797    UInt mm     = INSN(20,16);
13798    UInt opcode = INSN(15,12);
13799    UInt nn     = INSN(9,5);
13800    UInt dd     = INSN(4,0);
13801 
13802    if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
13803       /* ------- 0x,0000: FMUL d_d, s_s ------- */
13804       /* ------- 0x,0001: FDIV d_d, s_s ------- */
13805       /* ------- 0x,0010: FADD d_d, s_s ------- */
13806       /* ------- 0x,0011: FSUB d_d, s_s ------- */
13807       /* ------- 0x,0100: FMAX d_d, s_s ------- */
13808       /* ------- 0x,0101: FMIN d_d, s_s ------- */
13809       /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
13810       /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
13811       IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13812       IROp   iop = Iop_INVALID;
13813       const HChar* nm = "???";
13814       switch (opcode) {
13815          case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
13816          case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
13817          case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
13818          case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
13819          case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
13820          case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
13821          case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
13822          case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
13823          default: vassert(0);
13824       }
13825       if (opcode <= BITS4(0,0,1,1)) {
13826          // This is really not good code.  TODO: avoid width-changing
13827          IRTemp res = newTemp(ity);
13828          assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13829                                 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13830          putQReg128(dd, mkV128(0));
13831          putQRegLO(dd, mkexpr(res));
13832       } else {
13833          putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
13834                              binop(iop, getQReg128(nn), getQReg128(mm))));
13835       }
13836       DIP("%s %s, %s, %s\n",
13837           nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13838       return True;
13839    }
13840 
13841    if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
13842       /* ------- 0x,1000: FNMUL d_d, s_s ------- */
13843       IRType ity  = ty == X00 ? Ity_F32 : Ity_F64;
13844       IROp   iop  = mkMULF(ity);
13845       IROp   iopn = mkNEGF(ity);
13846       const HChar* nm = "fnmul";
13847       IRExpr* resE = unop(iopn,
13848                           triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13849                                 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13850       IRTemp  res  = newTemp(ity);
13851       assign(res, resE);
13852       putQReg128(dd, mkV128(0));
13853       putQRegLO(dd, mkexpr(res));
13854       DIP("%s %s, %s, %s\n",
13855           nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13856       return True;
13857    }
13858 
13859    return False;
13860 #  undef INSN
13861 }
13862 
13863 
13864 static
dis_AdvSIMD_fp_data_proc_3_source(DisResult * dres,UInt insn)13865 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
13866 {
13867    /* 31  28    23 21 20 15 14 9 4
13868       000 11111 ty o1 m  o0 a  n d
13869       The first 3 bits are really "M 0 S", but M and S are always zero.
13870       Decode fields: ty,o1,o0
13871    */
13872 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13873    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
13874       return False;
13875    }
13876    UInt ty    = INSN(23,22);
13877    UInt bitO1 = INSN(21,21);
13878    UInt mm    = INSN(20,16);
13879    UInt bitO0 = INSN(15,15);
13880    UInt aa    = INSN(14,10);
13881    UInt nn    = INSN(9,5);
13882    UInt dd    = INSN(4,0);
13883    vassert(ty < 4);
13884 
13885    if (ty <= X01) {
13886       /* -------- 0x,0,0 FMADD  d_d_d_d, s_s_s_s -------- */
13887       /* -------- 0x,0,1 FMSUB  d_d_d_d, s_s_s_s -------- */
13888       /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
13889       /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
13890       /* -------------------- F{N}M{ADD,SUB} -------------------- */
13891       /* 31          22   20 15 14 9 4   ix
13892          000 11111 0 sz 0 m  0  a  n d   0   FMADD  Fd,Fn,Fm,Fa
13893          000 11111 0 sz 0 m  1  a  n d   1   FMSUB  Fd,Fn,Fm,Fa
13894          000 11111 0 sz 1 m  0  a  n d   2   FNMADD Fd,Fn,Fm,Fa
13895          000 11111 0 sz 1 m  1  a  n d   3   FNMSUB Fd,Fn,Fm,Fa
13896          where Fx=Dx when sz=1, Fx=Sx when sz=0
13897 
13898                   -----SPEC------    ----IMPL----
13899          fmadd       a +    n * m    a + n * m
13900          fmsub       a + (-n) * m    a - n * m
13901          fnmadd   (-a) + (-n) * m    -(a + n * m)
13902          fnmsub   (-a) +    n * m    -(a - n * m)
13903       */
13904       Bool    isD   = (ty & 1) == 1;
13905       UInt    ix    = (bitO1 << 1) | bitO0;
13906       IRType  ity   = isD ? Ity_F64 : Ity_F32;
13907       IROp    opADD = mkADDF(ity);
13908       IROp    opSUB = mkSUBF(ity);
13909       IROp    opMUL = mkMULF(ity);
13910       IROp    opNEG = mkNEGF(ity);
13911       IRTemp  res   = newTemp(ity);
13912       IRExpr* eA    = getQRegLO(aa, ity);
13913       IRExpr* eN    = getQRegLO(nn, ity);
13914       IRExpr* eM    = getQRegLO(mm, ity);
13915       IRExpr* rm    = mkexpr(mk_get_IR_rounding_mode());
13916       IRExpr* eNxM  = triop(opMUL, rm, eN, eM);
13917       switch (ix) {
13918          case 0:  assign(res, triop(opADD, rm, eA, eNxM)); break;
13919          case 1:  assign(res, triop(opSUB, rm, eA, eNxM)); break;
13920          case 2:  assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
13921          case 3:  assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
13922          default: vassert(0);
13923       }
13924       putQReg128(dd, mkV128(0x0000));
13925       putQRegLO(dd, mkexpr(res));
13926       const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
13927       DIP("%s %s, %s, %s, %s\n",
13928           names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
13929                      nameQRegLO(mm, ity), nameQRegLO(aa, ity));
13930       return True;
13931    }
13932 
13933    return False;
13934 #  undef INSN
13935 }
13936 
13937 
13938 static
dis_AdvSIMD_fp_immediate(DisResult * dres,UInt insn)13939 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
13940 {
13941    /* 31  28    23 21 20   12  9    4
13942       000 11110 ty 1  imm8 100 imm5 d
13943       The first 3 bits are really "M 0 S", but M and S are always zero.
13944    */
13945 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13946    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13947        || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
13948       return False;
13949    }
13950    UInt ty     = INSN(23,22);
13951    UInt imm8   = INSN(20,13);
13952    UInt imm5   = INSN(9,5);
13953    UInt dd     = INSN(4,0);
13954 
13955    /* ------- 00,00000: FMOV s_imm ------- */
13956    /* ------- 01,00000: FMOV d_imm ------- */
13957    if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
13958       Bool  isD  = (ty & 1) == 1;
13959       ULong imm  = VFPExpandImm(imm8, isD ? 64 : 32);
13960       if (!isD) {
13961          vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
13962       }
13963       putQReg128(dd, mkV128(0));
13964       putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
13965       DIP("fmov %s, #0x%llx\n",
13966           nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
13967       return True;
13968    }
13969 
13970    return False;
13971 #  undef INSN
13972 }
13973 
13974 
13975 static
dis_AdvSIMD_fp_to_from_fixedp_conv(DisResult * dres,UInt insn)13976 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
13977 {
13978 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13979    /* 31 30 29 28    23   21 20    18     15    9 4
13980       sf  0  0 11110 type 0  rmode opcode scale n d
13981       The first 3 bits are really "sf 0 S", but S is always zero.
13982       Decode fields: sf,type,rmode,opcode
13983    */
13984 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
13985    if (INSN(30,29) != BITS2(0,0)
13986        || INSN(28,24) != BITS5(1,1,1,1,0)
13987        || INSN(21,21) != 0) {
13988       return False;
13989    }
13990    UInt bitSF = INSN(31,31);
13991    UInt ty    = INSN(23,22); // type
13992    UInt rm    = INSN(20,19); // rmode
13993    UInt op    = INSN(18,16); // opcode
13994    UInt sc    = INSN(15,10); // scale
13995    UInt nn    = INSN(9,5);
13996    UInt dd    = INSN(4,0);
13997 
13998    if (ty <= X01 && rm == X11
13999        && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
14000       /* -------- (ix) sf ty rm opc -------- */
14001       /* -------- 0    0  00 11 000: FCVTZS w_s_#fbits -------- */
14002       /* -------- 1    0  01 11 000: FCVTZS w_d_#fbits -------- */
14003       /* -------- 2    1  00 11 000: FCVTZS x_s_#fbits -------- */
14004       /* -------- 3    1  01 11 000: FCVTZS x_d_#fbits -------- */
14005 
14006       /* -------- 4    0  00 11 001: FCVTZU w_s_#fbits -------- */
14007       /* -------- 5    0  01 11 001: FCVTZU w_d_#fbits -------- */
14008       /* -------- 6    1  00 11 001: FCVTZU x_s_#fbits -------- */
14009       /* -------- 7    1  01 11 001: FCVTZU x_d_#fbits -------- */
14010       Bool isI64 = bitSF == 1;
14011       Bool isF64 = (ty & 1) == 1;
14012       Bool isU   = (op & 1) == 1;
14013       UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14014 
14015       Int fbits = 64 - sc;
14016       vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
14017 
14018       Double  scale  = two_to_the_plus(fbits);
14019       IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
14020                              : IRExpr_Const(IRConst_F32( (Float)scale ));
14021       IROp    opMUL  = isF64 ? Iop_MulF64 : Iop_MulF32;
14022 
14023       const IROp ops[8]
14024         = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
14025             Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
14026       IRTemp irrm = newTemp(Ity_I32);
14027       assign(irrm, mkU32(Irrm_ZERO));
14028 
14029       IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
14030       IRExpr* res = binop(ops[ix], mkexpr(irrm),
14031                                    triop(opMUL, mkexpr(irrm), src, scaleE));
14032       putIRegOrZR(isI64, dd, res);
14033 
14034       DIP("fcvtz%c %s, %s, #%d\n",
14035           isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
14036           nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
14037       return True;
14038    }
14039 
14040    /* ------ sf,ty,rm,opc ------ */
14041    /* ------ x,0x,00,010  SCVTF s/d, w/x, #fbits  ------ */
14042    /* ------ x,0x,00,011  UCVTF s/d, w/x, #fbits  ------ */
14043    /* (ix) sf  S 28    ty   rm opc 15    9 4
14044       0    0 0 0 11110 00 0 00 010 scale n d  SCVTF Sd, Wn, #fbits
14045       1    0 0 0 11110 01 0 00 010 scale n d  SCVTF Dd, Wn, #fbits
14046       2    1 0 0 11110 00 0 00 010 scale n d  SCVTF Sd, Xn, #fbits
14047       3    1 0 0 11110 01 0 00 010 scale n d  SCVTF Dd, Xn, #fbits
14048 
14049       4    0 0 0 11110 00 0 00 011 scale n d  UCVTF Sd, Wn, #fbits
14050       5    0 0 0 11110 01 0 00 011 scale n d  UCVTF Dd, Wn, #fbits
14051       6    1 0 0 11110 00 0 00 011 scale n d  UCVTF Sd, Xn, #fbits
14052       7    1 0 0 11110 01 0 00 011 scale n d  UCVTF Dd, Xn, #fbits
14053 
14054       These are signed/unsigned conversion from integer registers to
14055       FP registers, all 4 32/64-bit combinations, rounded per FPCR,
14056       scaled per |scale|.
14057    */
14058    if (ty <= X01 && rm == X00
14059        && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
14060        && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
14061       Bool isI64 = bitSF == 1;
14062       Bool isF64 = (ty & 1) == 1;
14063       Bool isU   = (op & 1) == 1;
14064       UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14065 
14066       Int fbits = 64 - sc;
14067       vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
14068 
14069       Double  scale  = two_to_the_minus(fbits);
14070       IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
14071                              : IRExpr_Const(IRConst_F32( (Float)scale ));
14072       IROp    opMUL  = isF64 ? Iop_MulF64 : Iop_MulF32;
14073 
14074       const IROp ops[8]
14075         = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14076             Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14077       IRExpr* src = getIRegOrZR(isI64, nn);
14078       IRExpr* res = (isF64 && !isI64)
14079                        ? unop(ops[ix], src)
14080                        : binop(ops[ix],
14081                                mkexpr(mk_get_IR_rounding_mode()), src);
14082       putQReg128(dd, mkV128(0));
14083       putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
14084 
14085       DIP("%ccvtf %s, %s, #%d\n",
14086           isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14087           nameIRegOrZR(isI64, nn), fbits);
14088       return True;
14089    }
14090 
14091    return False;
14092 #  undef INSN
14093 }
14094 
14095 
14096 static
dis_AdvSIMD_fp_to_from_int_conv(DisResult * dres,UInt insn)14097 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
14098 {
14099    /* 31 30 29 28    23   21 20    18     15     9 4
14100       sf  0  0 11110 type 1  rmode opcode 000000 n d
14101       The first 3 bits are really "sf 0 S", but S is always zero.
14102       Decode fields: sf,type,rmode,opcode
14103    */
14104 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
14105    if (INSN(30,29) != BITS2(0,0)
14106        || INSN(28,24) != BITS5(1,1,1,1,0)
14107        || INSN(21,21) != 1
14108        || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
14109       return False;
14110    }
14111    UInt bitSF = INSN(31,31);
14112    UInt ty    = INSN(23,22); // type
14113    UInt rm    = INSN(20,19); // rmode
14114    UInt op    = INSN(18,16); // opcode
14115    UInt nn    = INSN(9,5);
14116    UInt dd    = INSN(4,0);
14117 
14118    // op = 000, 001
14119    /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
14120    /*    30       23   20 18  15     9 4
14121       sf 00 11110 0x 1 00 000 000000 n d  FCVTNS Rd, Fn (round to
14122       sf 00 11110 0x 1 00 001 000000 n d  FCVTNU Rd, Fn  nearest)
14123       ---------------- 01 --------------  FCVTP-------- (round to +inf)
14124       ---------------- 10 --------------  FCVTM-------- (round to -inf)
14125       ---------------- 11 --------------  FCVTZ-------- (round to zero)
14126       ---------------- 00 100 ----------  FCVTAS------- (nearest, ties away)
14127       ---------------- 00 101 ----------  FCVTAU------- (nearest, ties away)
14128 
14129       Rd is Xd when sf==1, Wd when sf==0
14130       Fn is Dn when x==1, Sn when x==0
14131       20:19 carry the rounding mode, using the same encoding as FPCR
14132    */
14133    if (ty <= X01
14134        && (   ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
14135            || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
14136           )
14137       ) {
14138       Bool isI64 = bitSF == 1;
14139       Bool isF64 = (ty & 1) == 1;
14140       Bool isU   = (op & 1) == 1;
14141       /* Decide on the IR rounding mode to use. */
14142       IRRoundingMode irrm = 8; /*impossible*/
14143       HChar ch = '?';
14144       if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
14145          switch (rm) {
14146             case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
14147             case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
14148             case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
14149             case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
14150             default: vassert(0);
14151          }
14152       } else {
14153          vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
14154          switch (rm) {
14155             case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
14156             default: vassert(0);
14157          }
14158       }
14159       vassert(irrm != 8);
14160       /* Decide on the conversion primop, based on the source size,
14161          dest size and signedness (8 possibilities).  Case coding:
14162             F32 ->s I32   0
14163             F32 ->u I32   1
14164             F32 ->s I64   2
14165             F32 ->u I64   3
14166             F64 ->s I32   4
14167             F64 ->u I32   5
14168             F64 ->s I64   6
14169             F64 ->u I64   7
14170       */
14171       UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
14172       vassert(ix < 8);
14173       const IROp iops[8]
14174          = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
14175              Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
14176       IROp iop = iops[ix];
14177       // A bit of ATCery: bounce all cases we haven't seen an example of.
14178       if (/* F32toI32S */
14179              (iop == Iop_F32toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Sn */
14180           || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
14181           || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
14182           || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
14183           /* F32toI32U */
14184           || (iop == Iop_F32toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Sn */
14185           || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
14186           || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
14187           || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
14188           /* F32toI64S */
14189           || (iop == Iop_F32toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Sn */
14190           || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
14191           || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
14192           || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
14193           /* F32toI64U */
14194           || (iop == Iop_F32toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Sn */
14195           || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
14196           || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
14197           || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
14198           /* F64toI32S */
14199           || (iop == Iop_F64toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Dn */
14200           || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
14201           || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
14202           || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
14203           /* F64toI32U */
14204           || (iop == Iop_F64toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Dn */
14205           || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
14206           || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
14207           || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
14208           /* F64toI64S */
14209           || (iop == Iop_F64toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Dn */
14210           || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
14211           || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
14212           || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
14213           /* F64toI64U */
14214           || (iop == Iop_F64toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Dn */
14215           || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
14216           || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
14217           || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
14218          ) {
14219         /* validated */
14220       } else {
14221         return False;
14222       }
14223       IRType srcTy  = isF64 ? Ity_F64 : Ity_F32;
14224       IRType dstTy  = isI64 ? Ity_I64 : Ity_I32;
14225       IRTemp src    = newTemp(srcTy);
14226       IRTemp dst    = newTemp(dstTy);
14227       assign(src, getQRegLO(nn, srcTy));
14228       assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
14229       putIRegOrZR(isI64, dd, mkexpr(dst));
14230       DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
14231           nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
14232       return True;
14233    }
14234 
14235    // op = 010, 011
14236    /* -------------- {S,U}CVTF (scalar, integer) -------------- */
14237    /* (ix) sf  S 28    ty   rm op  15     9 4
14238       0    0 0 0 11110 00 1 00 010 000000 n d  SCVTF Sd, Wn
14239       1    0 0 0 11110 01 1 00 010 000000 n d  SCVTF Dd, Wn
14240       2    1 0 0 11110 00 1 00 010 000000 n d  SCVTF Sd, Xn
14241       3    1 0 0 11110 01 1 00 010 000000 n d  SCVTF Dd, Xn
14242 
14243       4    0 0 0 11110 00 1 00 011 000000 n d  UCVTF Sd, Wn
14244       5    0 0 0 11110 01 1 00 011 000000 n d  UCVTF Dd, Wn
14245       6    1 0 0 11110 00 1 00 011 000000 n d  UCVTF Sd, Xn
14246       7    1 0 0 11110 01 1 00 011 000000 n d  UCVTF Dd, Xn
14247 
14248       These are signed/unsigned conversion from integer registers to
14249       FP registers, all 4 32/64-bit combinations, rounded per FPCR.
14250    */
14251    if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
14252       Bool isI64 = bitSF == 1;
14253       Bool isF64 = (ty & 1) == 1;
14254       Bool isU   = (op & 1) == 1;
14255       UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14256       const IROp ops[8]
14257         = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14258             Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14259       IRExpr* src = getIRegOrZR(isI64, nn);
14260       IRExpr* res = (isF64 && !isI64)
14261                        ? unop(ops[ix], src)
14262                        : binop(ops[ix],
14263                                mkexpr(mk_get_IR_rounding_mode()), src);
14264       putQReg128(dd, mkV128(0));
14265       putQRegLO(dd, res);
14266       DIP("%ccvtf %s, %s\n",
14267           isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14268           nameIRegOrZR(isI64, nn));
14269       return True;
14270    }
14271 
14272    // op = 110, 111
14273    /* -------- FMOV (general) -------- */
14274    /* case sf  S       ty   rm op  15     9 4
14275        (1) 0 0 0 11110 00 1 00 111 000000 n d     FMOV Sd,      Wn
14276        (2) 1 0 0 11110 01 1 00 111 000000 n d     FMOV Dd,      Xn
14277        (3) 1 0 0 11110 10 1 01 111 000000 n d     FMOV Vd.D[1], Xn
14278 
14279        (4) 0 0 0 11110 00 1 00 110 000000 n d     FMOV Wd, Sn
14280        (5) 1 0 0 11110 01 1 00 110 000000 n d     FMOV Xd, Dn
14281        (6) 1 0 0 11110 10 1 01 110 000000 n d     FMOV Xd, Vn.D[1]
14282    */
14283    if (1) {
14284       UInt ix = 0; // case
14285       if (bitSF == 0) {
14286          if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14287             ix = 1;
14288          else
14289          if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14290             ix = 4;
14291       } else {
14292          vassert(bitSF == 1);
14293          if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14294             ix = 2;
14295          else
14296          if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14297             ix = 5;
14298          else
14299          if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
14300             ix = 3;
14301          else
14302          if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
14303             ix = 6;
14304       }
14305       if (ix > 0) {
14306          switch (ix) {
14307             case 1:
14308                putQReg128(dd, mkV128(0));
14309                putQRegLO(dd, getIReg32orZR(nn));
14310                DIP("fmov s%u, w%u\n", dd, nn);
14311                break;
14312             case 2:
14313                putQReg128(dd, mkV128(0));
14314                putQRegLO(dd, getIReg64orZR(nn));
14315                DIP("fmov d%u, x%u\n", dd, nn);
14316                break;
14317             case 3:
14318                putQRegHI64(dd, getIReg64orZR(nn));
14319                DIP("fmov v%u.d[1], x%u\n", dd, nn);
14320                break;
14321             case 4:
14322                putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
14323                DIP("fmov w%u, s%u\n", dd, nn);
14324                break;
14325             case 5:
14326                putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
14327                DIP("fmov x%u, d%u\n", dd, nn);
14328                break;
14329             case 6:
14330                putIReg64orZR(dd, getQRegHI64(nn));
14331                DIP("fmov x%u, v%u.d[1]\n", dd, nn);
14332                break;
14333             default:
14334                vassert(0);
14335          }
14336          return True;
14337       }
14338       /* undecodable; fall through */
14339    }
14340 
14341    return False;
14342 #  undef INSN
14343 }
14344 
14345 
14346 static
dis_ARM64_simd_and_fp(DisResult * dres,UInt insn)14347 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
14348 {
14349    Bool ok;
14350    ok = dis_AdvSIMD_EXT(dres, insn);
14351    if (UNLIKELY(ok)) return True;
14352    ok = dis_AdvSIMD_TBL_TBX(dres, insn);
14353    if (UNLIKELY(ok)) return True;
14354    ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
14355    if (UNLIKELY(ok)) return True;
14356    ok = dis_AdvSIMD_across_lanes(dres, insn);
14357    if (UNLIKELY(ok)) return True;
14358    ok = dis_AdvSIMD_copy(dres, insn);
14359    if (UNLIKELY(ok)) return True;
14360    ok = dis_AdvSIMD_modified_immediate(dres, insn);
14361    if (UNLIKELY(ok)) return True;
14362    ok = dis_AdvSIMD_scalar_copy(dres, insn);
14363    if (UNLIKELY(ok)) return True;
14364    ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
14365    if (UNLIKELY(ok)) return True;
14366    ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
14367    if (UNLIKELY(ok)) return True;
14368    ok = dis_AdvSIMD_scalar_three_different(dres, insn);
14369    if (UNLIKELY(ok)) return True;
14370    ok = dis_AdvSIMD_scalar_three_same(dres, insn);
14371    if (UNLIKELY(ok)) return True;
14372    ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
14373    if (UNLIKELY(ok)) return True;
14374    ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
14375    if (UNLIKELY(ok)) return True;
14376    ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
14377    if (UNLIKELY(ok)) return True;
14378    ok = dis_AdvSIMD_three_different(dres, insn);
14379    if (UNLIKELY(ok)) return True;
14380    ok = dis_AdvSIMD_three_same(dres, insn);
14381    if (UNLIKELY(ok)) return True;
14382    ok = dis_AdvSIMD_two_reg_misc(dres, insn);
14383    if (UNLIKELY(ok)) return True;
14384    ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
14385    if (UNLIKELY(ok)) return True;
14386    ok = dis_AdvSIMD_crypto_aes(dres, insn);
14387    if (UNLIKELY(ok)) return True;
14388    ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
14389    if (UNLIKELY(ok)) return True;
14390    ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
14391    if (UNLIKELY(ok)) return True;
14392    ok = dis_AdvSIMD_fp_compare(dres, insn);
14393    if (UNLIKELY(ok)) return True;
14394    ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
14395    if (UNLIKELY(ok)) return True;
14396    ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
14397    if (UNLIKELY(ok)) return True;
14398    ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
14399    if (UNLIKELY(ok)) return True;
14400    ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
14401    if (UNLIKELY(ok)) return True;
14402    ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
14403    if (UNLIKELY(ok)) return True;
14404    ok = dis_AdvSIMD_fp_immediate(dres, insn);
14405    if (UNLIKELY(ok)) return True;
14406    ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
14407    if (UNLIKELY(ok)) return True;
14408    ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
14409    if (UNLIKELY(ok)) return True;
14410    return False;
14411 }
14412 
14413 
14414 /*------------------------------------------------------------*/
14415 /*--- Disassemble a single ARM64 instruction               ---*/
14416 /*------------------------------------------------------------*/
14417 
14418 /* Disassemble a single ARM64 instruction into IR.  The instruction
14419    has is located at |guest_instr| and has guest IP of
14420    |guest_PC_curr_instr|, which will have been set before the call
14421    here.  Returns True iff the instruction was decoded, in which case
14422    *dres will be set accordingly, or False, in which case *dres should
14423    be ignored by the caller. */
14424 
14425 static
disInstr_ARM64_WRK(DisResult * dres,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,const UChar * guest_instr,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo)14426 Bool disInstr_ARM64_WRK (
14427         /*MB_OUT*/DisResult* dres,
14428         Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
14429         Bool         resteerCisOk,
14430         void*        callback_opaque,
14431         const UChar* guest_instr,
14432         const VexArchInfo* archinfo,
14433         const VexAbiInfo*  abiinfo
14434      )
14435 {
14436    // A macro to fish bits out of 'insn'.
14437 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
14438 
14439 //ZZ    DisResult dres;
14440 //ZZ    UInt      insn;
14441 //ZZ    //Bool      allow_VFP = False;
14442 //ZZ    //UInt      hwcaps = archinfo->hwcaps;
14443 //ZZ    IRTemp    condT; /* :: Ity_I32 */
14444 //ZZ    UInt      summary;
14445 //ZZ    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
14446 //ZZ
14447 //ZZ    /* What insn variants are we supporting today? */
14448 //ZZ    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
14449 //ZZ    // etc etc
14450 
14451    /* Set result defaults. */
14452    dres->whatNext    = Dis_Continue;
14453    dres->len         = 4;
14454    dres->continueAt  = 0;
14455    dres->jk_StopHere = Ijk_INVALID;
14456    dres->hint        = Dis_HintNone;
14457 
14458    /* At least this is simple on ARM64: insns are all 4 bytes long, and
14459       4-aligned.  So just fish the whole thing out of memory right now
14460       and have done. */
14461    UInt insn = getUIntLittleEndianly( guest_instr );
14462 
14463    if (0) vex_printf("insn: 0x%x\n", insn);
14464 
14465    DIP("\t(arm64) 0x%llx:  ", (ULong)guest_PC_curr_instr);
14466 
14467    vassert(0 == (guest_PC_curr_instr & 3ULL));
14468 
14469    /* ----------------------------------------------------------- */
14470 
14471    /* Spot "Special" instructions (see comment at top of file). */
14472    {
14473       const UChar* code = guest_instr;
14474       /* Spot the 16-byte preamble:
14475             93CC0D8C   ror x12, x12, #3
14476             93CC358C   ror x12, x12, #13
14477             93CCCD8C   ror x12, x12, #51
14478             93CCF58C   ror x12, x12, #61
14479       */
14480       UInt word1 = 0x93CC0D8C;
14481       UInt word2 = 0x93CC358C;
14482       UInt word3 = 0x93CCCD8C;
14483       UInt word4 = 0x93CCF58C;
14484       if (getUIntLittleEndianly(code+ 0) == word1 &&
14485           getUIntLittleEndianly(code+ 4) == word2 &&
14486           getUIntLittleEndianly(code+ 8) == word3 &&
14487           getUIntLittleEndianly(code+12) == word4) {
14488          /* Got a "Special" instruction preamble.  Which one is it? */
14489          if (getUIntLittleEndianly(code+16) == 0xAA0A014A
14490                                                /* orr x10,x10,x10 */) {
14491             /* X3 = client_request ( X4 ) */
14492             DIP("x3 = client_request ( x4 )\n");
14493             putPC(mkU64( guest_PC_curr_instr + 20 ));
14494             dres->jk_StopHere = Ijk_ClientReq;
14495             dres->whatNext    = Dis_StopHere;
14496             return True;
14497          }
14498          else
14499          if (getUIntLittleEndianly(code+16) == 0xAA0B016B
14500                                                /* orr x11,x11,x11 */) {
14501             /* X3 = guest_NRADDR */
14502             DIP("x3 = guest_NRADDR\n");
14503             dres->len = 20;
14504             putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
14505             return True;
14506          }
14507          else
14508          if (getUIntLittleEndianly(code+16) == 0xAA0C018C
14509                                                /* orr x12,x12,x12 */) {
14510             /*  branch-and-link-to-noredir X8 */
14511             DIP("branch-and-link-to-noredir x8\n");
14512             putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
14513             putPC(getIReg64orZR(8));
14514             dres->jk_StopHere = Ijk_NoRedir;
14515             dres->whatNext    = Dis_StopHere;
14516             return True;
14517          }
14518          else
14519          if (getUIntLittleEndianly(code+16) == 0xAA090129
14520                                                /* orr x9,x9,x9 */) {
14521             /* IR injection */
14522             DIP("IR injection\n");
14523             vex_inject_ir(irsb, Iend_LE);
14524             // Invalidate the current insn. The reason is that the IRop we're
14525             // injecting here can change. In which case the translation has to
14526             // be redone. For ease of handling, we simply invalidate all the
14527             // time.
14528             stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
14529             stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(20)));
14530             putPC(mkU64( guest_PC_curr_instr + 20 ));
14531             dres->whatNext    = Dis_StopHere;
14532             dres->jk_StopHere = Ijk_InvalICache;
14533             return True;
14534          }
14535          /* We don't know what it is. */
14536          return False;
14537          /*NOTREACHED*/
14538       }
14539    }
14540 
14541    /* ----------------------------------------------------------- */
14542 
14543    /* Main ARM64 instruction decoder starts here. */
14544 
14545    Bool ok = False;
14546 
14547    /* insn[28:25] determines the top-level grouping, so let's start
14548       off with that.
14549 
14550       For all of these dis_ARM64_ functions, we pass *dres with the
14551       normal default results "insn OK, 4 bytes long, keep decoding" so
14552       they don't need to change it.  However, decodes of control-flow
14553       insns may cause *dres to change.
14554    */
14555    switch (INSN(28,25)) {
14556       case BITS4(1,0,0,0): case BITS4(1,0,0,1):
14557          // Data processing - immediate
14558          ok = dis_ARM64_data_processing_immediate(dres, insn);
14559          break;
14560       case BITS4(1,0,1,0): case BITS4(1,0,1,1):
14561          // Branch, exception generation and system instructions
14562          ok = dis_ARM64_branch_etc(dres, insn, archinfo, abiinfo);
14563          break;
14564       case BITS4(0,1,0,0): case BITS4(0,1,1,0):
14565       case BITS4(1,1,0,0): case BITS4(1,1,1,0):
14566          // Loads and stores
14567          ok = dis_ARM64_load_store(dres, insn, abiinfo);
14568          break;
14569       case BITS4(0,1,0,1): case BITS4(1,1,0,1):
14570          // Data processing - register
14571          ok = dis_ARM64_data_processing_register(dres, insn);
14572          break;
14573       case BITS4(0,1,1,1): case BITS4(1,1,1,1):
14574          // Data processing - SIMD and floating point
14575          ok = dis_ARM64_simd_and_fp(dres, insn);
14576          break;
14577       case BITS4(0,0,0,0): case BITS4(0,0,0,1):
14578       case BITS4(0,0,1,0): case BITS4(0,0,1,1):
14579          // UNALLOCATED
14580          break;
14581       default:
14582          vassert(0); /* Can't happen */
14583    }
14584 
14585    /* If the next-level down decoders failed, make sure |dres| didn't
14586       get changed. */
14587    if (!ok) {
14588       vassert(dres->whatNext    == Dis_Continue);
14589       vassert(dres->len         == 4);
14590       vassert(dres->continueAt  == 0);
14591       vassert(dres->jk_StopHere == Ijk_INVALID);
14592    }
14593 
14594    return ok;
14595 
14596 #  undef INSN
14597 }
14598 
14599 
14600 /*------------------------------------------------------------*/
14601 /*--- Top-level fn                                         ---*/
14602 /*------------------------------------------------------------*/
14603 
14604 /* Disassemble a single instruction into IR.  The instruction
14605    is located in host memory at &guest_code[delta]. */
14606 
disInstr_ARM64(IRSB * irsb_IN,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,const UChar * guest_code_IN,Long delta_IN,Addr guest_IP,VexArch guest_arch,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo,VexEndness host_endness_IN,Bool sigill_diag_IN)14607 DisResult disInstr_ARM64 ( IRSB*        irsb_IN,
14608                            Bool         (*resteerOkFn) ( void*, Addr ),
14609                            Bool         resteerCisOk,
14610                            void*        callback_opaque,
14611                            const UChar* guest_code_IN,
14612                            Long         delta_IN,
14613                            Addr         guest_IP,
14614                            VexArch      guest_arch,
14615                            const VexArchInfo* archinfo,
14616                            const VexAbiInfo*  abiinfo,
14617                            VexEndness   host_endness_IN,
14618                            Bool         sigill_diag_IN )
14619 {
14620    DisResult dres;
14621    vex_bzero(&dres, sizeof(dres));
14622 
14623    /* Set globals (see top of this file) */
14624    vassert(guest_arch == VexArchARM64);
14625 
14626    irsb                = irsb_IN;
14627    host_endness        = host_endness_IN;
14628    guest_PC_curr_instr = (Addr64)guest_IP;
14629 
14630    /* Sanity checks */
14631    /* (x::UInt - 2) <= 15   ===   x >= 2 && x <= 17 (I hope) */
14632    vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
14633    vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
14634 
14635    /* Try to decode */
14636    Bool ok = disInstr_ARM64_WRK( &dres,
14637                                  resteerOkFn, resteerCisOk, callback_opaque,
14638                                  &guest_code_IN[delta_IN],
14639                                  archinfo, abiinfo );
14640    if (ok) {
14641       /* All decode successes end up here. */
14642       vassert(dres.len == 4 || dres.len == 20);
14643       switch (dres.whatNext) {
14644          case Dis_Continue:
14645             putPC( mkU64(dres.len + guest_PC_curr_instr) );
14646             break;
14647          case Dis_ResteerU:
14648          case Dis_ResteerC:
14649             putPC(mkU64(dres.continueAt));
14650             break;
14651          case Dis_StopHere:
14652             break;
14653          default:
14654             vassert(0);
14655       }
14656       DIP("\n");
14657    } else {
14658       /* All decode failures end up here. */
14659       if (sigill_diag_IN) {
14660          Int   i, j;
14661          UChar buf[64];
14662          UInt  insn
14663                   = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
14664          vex_bzero(buf, sizeof(buf));
14665          for (i = j = 0; i < 32; i++) {
14666             if (i > 0) {
14667               if ((i & 7) == 0) buf[j++] = ' ';
14668               else if ((i & 3) == 0) buf[j++] = '\'';
14669             }
14670             buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
14671          }
14672          vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
14673          vex_printf("disInstr(arm64): %s\n", buf);
14674       }
14675 
14676       /* Tell the dispatcher that this insn cannot be decoded, and so
14677          has not been executed, and (is currently) the next to be
14678          executed.  PC should be up-to-date since it is made so at the
14679          start of each insn, but nevertheless be paranoid and update
14680          it again right now. */
14681       putPC( mkU64(guest_PC_curr_instr) );
14682       dres.len         = 0;
14683       dres.whatNext    = Dis_StopHere;
14684       dres.jk_StopHere = Ijk_NoDecode;
14685       dres.continueAt  = 0;
14686    }
14687    return dres;
14688 }
14689 
14690 
14691 /*--------------------------------------------------------------------*/
14692 /*--- end                                       guest_arm64_toIR.c ---*/
14693 /*--------------------------------------------------------------------*/
14694