1 /* -*- mode: C; c-basic-offset: 3; -*- */
2
3 /*--------------------------------------------------------------------*/
4 /*--- begin guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2013-2017 OpenWorks
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 /* KNOWN LIMITATIONS 2014-Nov-16
33
34 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
35
36 Also FP comparison "unordered" .. is implemented as normal FP
37 comparison.
38
39 Both should be fixed. They behave incorrectly in the presence of
40 NaNs.
41
42 FMULX is treated the same as FMUL. That's also not correct.
43
44 * Floating multiply-add (etc) insns. Are split into a multiply and
45 an add, and so suffer double rounding and hence sometimes the
46 least significant mantissa bit is incorrect. Fix: use the IR
47 multiply-add IROps instead.
48
49 * FRINTA, FRINTN are kludged .. they just round to nearest. No special
50 handling for the "ties" case. FRINTX might be dubious too.
51
52 * Ditto FCVTXN. No idea what "round to odd" means. This implementation
53 just rounds to nearest.
54 */
55
56 /* "Special" instructions.
57
58 This instruction decoder can decode four special instructions
59 which mean nothing natively (are no-ops as far as regs/mem are
60 concerned) but have meaning for supporting Valgrind. A special
61 instruction is flagged by a 16-byte preamble:
62
63 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
64 (ror x12, x12, #3; ror x12, x12, #13
65 ror x12, x12, #51; ror x12, x12, #61)
66
67 Following that, one of the following 3 are allowed
68 (standard interpretation in parentheses):
69
70 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
71 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
72 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
73 AA090129 (orr x9,x9,x9) IR injection
74
75 Any other bytes following the 16-byte preamble are illegal and
76 constitute a failure in instruction decoding. This all assumes
77 that the preamble will never occur except in specific code
78 fragments designed for Valgrind to catch.
79 */
80
81 /* Translates ARM64 code to IR. */
82
83 #include "libvex_basictypes.h"
84 #include "libvex_ir.h"
85 #include "libvex.h"
86 #include "libvex_guest_arm64.h"
87
88 #include "main_util.h"
89 #include "main_globals.h"
90 #include "guest_generic_bb_to_IR.h"
91 #include "guest_arm64_defs.h"
92
93
94 /*------------------------------------------------------------*/
95 /*--- Globals ---*/
96 /*------------------------------------------------------------*/
97
98 /* These are set at the start of the translation of a instruction, so
99 that we don't have to pass them around endlessly. CONST means does
100 not change during translation of the instruction.
101 */
102
103 /* CONST: what is the host's endianness? We need to know this in
104 order to do sub-register accesses to the SIMD/FP registers
105 correctly. */
106 static VexEndness host_endness;
107
108 /* CONST: The guest address for the instruction currently being
109 translated. */
110 static Addr64 guest_PC_curr_instr;
111
112 /* MOD: The IRSB* into which we're generating code. */
113 static IRSB* irsb;
114
115
116 /*------------------------------------------------------------*/
117 /*--- Debugging output ---*/
118 /*------------------------------------------------------------*/
119
120 #define DIP(format, args...) \
121 if (vex_traceflags & VEX_TRACE_FE) \
122 vex_printf(format, ## args)
123
124 #define DIS(buf, format, args...) \
125 if (vex_traceflags & VEX_TRACE_FE) \
126 vex_sprintf(buf, format, ## args)
127
128
129 /*------------------------------------------------------------*/
130 /*--- Helper bits and pieces for deconstructing the ---*/
131 /*--- arm insn stream. ---*/
132 /*------------------------------------------------------------*/
133
134 /* Do a little-endian load of a 32-bit word, regardless of the
135 endianness of the underlying host. */
getUIntLittleEndianly(const UChar * p)136 static inline UInt getUIntLittleEndianly ( const UChar* p )
137 {
138 UInt w = 0;
139 w = (w << 8) | p[3];
140 w = (w << 8) | p[2];
141 w = (w << 8) | p[1];
142 w = (w << 8) | p[0];
143 return w;
144 }
145
146 /* Sign extend a N-bit value up to 64 bits, by copying
147 bit N-1 into all higher positions. */
sx_to_64(ULong x,UInt n)148 static ULong sx_to_64 ( ULong x, UInt n )
149 {
150 vassert(n > 1 && n < 64);
151 x <<= (64-n);
152 Long r = (Long)x;
153 r >>= (64-n);
154 return (ULong)r;
155 }
156
157 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
158 //ZZ endianness of the underlying host. */
159 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
160 //ZZ {
161 //ZZ UShort w = 0;
162 //ZZ w = (w << 8) | p[1];
163 //ZZ w = (w << 8) | p[0];
164 //ZZ return w;
165 //ZZ }
166 //ZZ
167 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
168 //ZZ vassert(sh >= 0 && sh < 32);
169 //ZZ if (sh == 0)
170 //ZZ return x;
171 //ZZ else
172 //ZZ return (x << (32-sh)) | (x >> sh);
173 //ZZ }
174 //ZZ
175 //ZZ static Int popcount32 ( UInt x )
176 //ZZ {
177 //ZZ Int res = 0, i;
178 //ZZ for (i = 0; i < 32; i++) {
179 //ZZ res += (x & 1);
180 //ZZ x >>= 1;
181 //ZZ }
182 //ZZ return res;
183 //ZZ }
184 //ZZ
185 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
186 //ZZ {
187 //ZZ UInt mask = 1 << ix;
188 //ZZ x &= ~mask;
189 //ZZ x |= ((b << ix) & mask);
190 //ZZ return x;
191 //ZZ }
192
193 #define BITS2(_b1,_b0) \
194 (((_b1) << 1) | (_b0))
195
196 #define BITS3(_b2,_b1,_b0) \
197 (((_b2) << 2) | ((_b1) << 1) | (_b0))
198
199 #define BITS4(_b3,_b2,_b1,_b0) \
200 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
201
202 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
203 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
204 | BITS4((_b3),(_b2),(_b1),(_b0)))
205
206 #define BITS5(_b4,_b3,_b2,_b1,_b0) \
207 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
208 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
209 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
210 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
211 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
212
213 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
214 (((_b8) << 8) \
215 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
216
217 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
218 (((_b9) << 9) | ((_b8) << 8) \
219 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
220
221 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
222 (((_b10) << 10) \
223 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
224
225 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
226 (((_b11) << 11) \
227 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
228
229 #define X00 BITS2(0,0)
230 #define X01 BITS2(0,1)
231 #define X10 BITS2(1,0)
232 #define X11 BITS2(1,1)
233
234 // produces _uint[_bMax:_bMin]
235 #define SLICE_UInt(_uint,_bMax,_bMin) \
236 (( ((UInt)(_uint)) >> (_bMin)) \
237 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
238
239
240 /*------------------------------------------------------------*/
241 /*--- Helper bits and pieces for creating IR fragments. ---*/
242 /*------------------------------------------------------------*/
243
mkV128(UShort w)244 static IRExpr* mkV128 ( UShort w )
245 {
246 return IRExpr_Const(IRConst_V128(w));
247 }
248
mkU64(ULong i)249 static IRExpr* mkU64 ( ULong i )
250 {
251 return IRExpr_Const(IRConst_U64(i));
252 }
253
mkU32(UInt i)254 static IRExpr* mkU32 ( UInt i )
255 {
256 return IRExpr_Const(IRConst_U32(i));
257 }
258
mkU16(UInt i)259 static IRExpr* mkU16 ( UInt i )
260 {
261 vassert(i < 65536);
262 return IRExpr_Const(IRConst_U16(i));
263 }
264
mkU8(UInt i)265 static IRExpr* mkU8 ( UInt i )
266 {
267 vassert(i < 256);
268 return IRExpr_Const(IRConst_U8( (UChar)i ));
269 }
270
mkexpr(IRTemp tmp)271 static IRExpr* mkexpr ( IRTemp tmp )
272 {
273 return IRExpr_RdTmp(tmp);
274 }
275
unop(IROp op,IRExpr * a)276 static IRExpr* unop ( IROp op, IRExpr* a )
277 {
278 return IRExpr_Unop(op, a);
279 }
280
binop(IROp op,IRExpr * a1,IRExpr * a2)281 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
282 {
283 return IRExpr_Binop(op, a1, a2);
284 }
285
triop(IROp op,IRExpr * a1,IRExpr * a2,IRExpr * a3)286 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
287 {
288 return IRExpr_Triop(op, a1, a2, a3);
289 }
290
loadLE(IRType ty,IRExpr * addr)291 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
292 {
293 return IRExpr_Load(Iend_LE, ty, addr);
294 }
295
296 /* Add a statement to the list held by "irbb". */
stmt(IRStmt * st)297 static void stmt ( IRStmt* st )
298 {
299 addStmtToIRSB( irsb, st );
300 }
301
assign(IRTemp dst,IRExpr * e)302 static void assign ( IRTemp dst, IRExpr* e )
303 {
304 stmt( IRStmt_WrTmp(dst, e) );
305 }
306
storeLE(IRExpr * addr,IRExpr * data)307 static void storeLE ( IRExpr* addr, IRExpr* data )
308 {
309 stmt( IRStmt_Store(Iend_LE, addr, data) );
310 }
311
312 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
313 //ZZ {
314 //ZZ if (guardT == IRTemp_INVALID) {
315 //ZZ /* unconditional */
316 //ZZ storeLE(addr, data);
317 //ZZ } else {
318 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
319 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
320 //ZZ }
321 //ZZ }
322 //ZZ
323 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
324 //ZZ IRExpr* addr, IRExpr* alt,
325 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
326 //ZZ {
327 //ZZ if (guardT == IRTemp_INVALID) {
328 //ZZ /* unconditional */
329 //ZZ IRExpr* loaded = NULL;
330 //ZZ switch (cvt) {
331 //ZZ case ILGop_Ident32:
332 //ZZ loaded = loadLE(Ity_I32, addr); break;
333 //ZZ case ILGop_8Uto32:
334 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
335 //ZZ case ILGop_8Sto32:
336 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
337 //ZZ case ILGop_16Uto32:
338 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
339 //ZZ case ILGop_16Sto32:
340 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
341 //ZZ default:
342 //ZZ vassert(0);
343 //ZZ }
344 //ZZ vassert(loaded != NULL);
345 //ZZ assign(dst, loaded);
346 //ZZ } else {
347 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
348 //ZZ loaded data before putting the data in 'dst'. If the load
349 //ZZ does not take place, 'alt' is placed directly in 'dst'. */
350 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
351 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
352 //ZZ }
353 //ZZ }
354
355 /* Generate a new temporary of the given type. */
newTemp(IRType ty)356 static IRTemp newTemp ( IRType ty )
357 {
358 vassert(isPlausibleIRType(ty));
359 return newIRTemp( irsb->tyenv, ty );
360 }
361
362 /* This is used in many places, so the brevity is an advantage. */
newTempV128(void)363 static IRTemp newTempV128(void)
364 {
365 return newTemp(Ity_V128);
366 }
367
368 /* Initialise V128 temporaries en masse. */
369 static
newTempsV128_2(IRTemp * t1,IRTemp * t2)370 void newTempsV128_2(IRTemp* t1, IRTemp* t2)
371 {
372 vassert(t1 && *t1 == IRTemp_INVALID);
373 vassert(t2 && *t2 == IRTemp_INVALID);
374 *t1 = newTempV128();
375 *t2 = newTempV128();
376 }
377
378 static
newTempsV128_3(IRTemp * t1,IRTemp * t2,IRTemp * t3)379 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
380 {
381 vassert(t1 && *t1 == IRTemp_INVALID);
382 vassert(t2 && *t2 == IRTemp_INVALID);
383 vassert(t3 && *t3 == IRTemp_INVALID);
384 *t1 = newTempV128();
385 *t2 = newTempV128();
386 *t3 = newTempV128();
387 }
388
389 static
newTempsV128_4(IRTemp * t1,IRTemp * t2,IRTemp * t3,IRTemp * t4)390 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
391 {
392 vassert(t1 && *t1 == IRTemp_INVALID);
393 vassert(t2 && *t2 == IRTemp_INVALID);
394 vassert(t3 && *t3 == IRTemp_INVALID);
395 vassert(t4 && *t4 == IRTemp_INVALID);
396 *t1 = newTempV128();
397 *t2 = newTempV128();
398 *t3 = newTempV128();
399 *t4 = newTempV128();
400 }
401
402 static
newTempsV128_7(IRTemp * t1,IRTemp * t2,IRTemp * t3,IRTemp * t4,IRTemp * t5,IRTemp * t6,IRTemp * t7)403 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
404 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
405 {
406 vassert(t1 && *t1 == IRTemp_INVALID);
407 vassert(t2 && *t2 == IRTemp_INVALID);
408 vassert(t3 && *t3 == IRTemp_INVALID);
409 vassert(t4 && *t4 == IRTemp_INVALID);
410 vassert(t5 && *t5 == IRTemp_INVALID);
411 vassert(t6 && *t6 == IRTemp_INVALID);
412 vassert(t7 && *t7 == IRTemp_INVALID);
413 *t1 = newTempV128();
414 *t2 = newTempV128();
415 *t3 = newTempV128();
416 *t4 = newTempV128();
417 *t5 = newTempV128();
418 *t6 = newTempV128();
419 *t7 = newTempV128();
420 }
421
422 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
423 //ZZ IRRoundingMode. */
424 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
425 //ZZ {
426 //ZZ return mkU32(Irrm_NEAREST);
427 //ZZ }
428 //ZZ
429 //ZZ /* Generate an expression for SRC rotated right by ROT. */
430 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
431 //ZZ {
432 //ZZ vassert(rot >= 0 && rot < 32);
433 //ZZ if (rot == 0)
434 //ZZ return mkexpr(src);
435 //ZZ return
436 //ZZ binop(Iop_Or32,
437 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
438 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
439 //ZZ }
440 //ZZ
441 //ZZ static IRExpr* mkU128 ( ULong i )
442 //ZZ {
443 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
444 //ZZ }
445 //ZZ
446 //ZZ /* Generate a 4-aligned version of the given expression if
447 //ZZ the given condition is true. Else return it unchanged. */
448 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
449 //ZZ {
450 //ZZ if (b)
451 //ZZ return binop(Iop_And32, e, mkU32(~3));
452 //ZZ else
453 //ZZ return e;
454 //ZZ }
455
456 /* Other IR construction helpers. */
mkAND(IRType ty)457 static IROp mkAND ( IRType ty ) {
458 switch (ty) {
459 case Ity_I32: return Iop_And32;
460 case Ity_I64: return Iop_And64;
461 default: vpanic("mkAND");
462 }
463 }
464
mkOR(IRType ty)465 static IROp mkOR ( IRType ty ) {
466 switch (ty) {
467 case Ity_I32: return Iop_Or32;
468 case Ity_I64: return Iop_Or64;
469 default: vpanic("mkOR");
470 }
471 }
472
mkXOR(IRType ty)473 static IROp mkXOR ( IRType ty ) {
474 switch (ty) {
475 case Ity_I32: return Iop_Xor32;
476 case Ity_I64: return Iop_Xor64;
477 default: vpanic("mkXOR");
478 }
479 }
480
mkSHL(IRType ty)481 static IROp mkSHL ( IRType ty ) {
482 switch (ty) {
483 case Ity_I32: return Iop_Shl32;
484 case Ity_I64: return Iop_Shl64;
485 default: vpanic("mkSHL");
486 }
487 }
488
mkSHR(IRType ty)489 static IROp mkSHR ( IRType ty ) {
490 switch (ty) {
491 case Ity_I32: return Iop_Shr32;
492 case Ity_I64: return Iop_Shr64;
493 default: vpanic("mkSHR");
494 }
495 }
496
mkSAR(IRType ty)497 static IROp mkSAR ( IRType ty ) {
498 switch (ty) {
499 case Ity_I32: return Iop_Sar32;
500 case Ity_I64: return Iop_Sar64;
501 default: vpanic("mkSAR");
502 }
503 }
504
mkNOT(IRType ty)505 static IROp mkNOT ( IRType ty ) {
506 switch (ty) {
507 case Ity_I32: return Iop_Not32;
508 case Ity_I64: return Iop_Not64;
509 default: vpanic("mkNOT");
510 }
511 }
512
mkADD(IRType ty)513 static IROp mkADD ( IRType ty ) {
514 switch (ty) {
515 case Ity_I32: return Iop_Add32;
516 case Ity_I64: return Iop_Add64;
517 default: vpanic("mkADD");
518 }
519 }
520
mkSUB(IRType ty)521 static IROp mkSUB ( IRType ty ) {
522 switch (ty) {
523 case Ity_I32: return Iop_Sub32;
524 case Ity_I64: return Iop_Sub64;
525 default: vpanic("mkSUB");
526 }
527 }
528
mkADDF(IRType ty)529 static IROp mkADDF ( IRType ty ) {
530 switch (ty) {
531 case Ity_F32: return Iop_AddF32;
532 case Ity_F64: return Iop_AddF64;
533 default: vpanic("mkADDF");
534 }
535 }
536
mkSUBF(IRType ty)537 static IROp mkSUBF ( IRType ty ) {
538 switch (ty) {
539 case Ity_F32: return Iop_SubF32;
540 case Ity_F64: return Iop_SubF64;
541 default: vpanic("mkSUBF");
542 }
543 }
544
mkMULF(IRType ty)545 static IROp mkMULF ( IRType ty ) {
546 switch (ty) {
547 case Ity_F32: return Iop_MulF32;
548 case Ity_F64: return Iop_MulF64;
549 default: vpanic("mkMULF");
550 }
551 }
552
mkDIVF(IRType ty)553 static IROp mkDIVF ( IRType ty ) {
554 switch (ty) {
555 case Ity_F32: return Iop_DivF32;
556 case Ity_F64: return Iop_DivF64;
557 default: vpanic("mkMULF");
558 }
559 }
560
mkNEGF(IRType ty)561 static IROp mkNEGF ( IRType ty ) {
562 switch (ty) {
563 case Ity_F32: return Iop_NegF32;
564 case Ity_F64: return Iop_NegF64;
565 default: vpanic("mkNEGF");
566 }
567 }
568
mkABSF(IRType ty)569 static IROp mkABSF ( IRType ty ) {
570 switch (ty) {
571 case Ity_F32: return Iop_AbsF32;
572 case Ity_F64: return Iop_AbsF64;
573 default: vpanic("mkNEGF");
574 }
575 }
576
mkSQRTF(IRType ty)577 static IROp mkSQRTF ( IRType ty ) {
578 switch (ty) {
579 case Ity_F32: return Iop_SqrtF32;
580 case Ity_F64: return Iop_SqrtF64;
581 default: vpanic("mkNEGF");
582 }
583 }
584
mkVecADD(UInt size)585 static IROp mkVecADD ( UInt size ) {
586 const IROp ops[4]
587 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
588 vassert(size < 4);
589 return ops[size];
590 }
591
mkVecQADDU(UInt size)592 static IROp mkVecQADDU ( UInt size ) {
593 const IROp ops[4]
594 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
595 vassert(size < 4);
596 return ops[size];
597 }
598
mkVecQADDS(UInt size)599 static IROp mkVecQADDS ( UInt size ) {
600 const IROp ops[4]
601 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
602 vassert(size < 4);
603 return ops[size];
604 }
605
mkVecQADDEXTSUSATUU(UInt size)606 static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
607 const IROp ops[4]
608 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
609 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
610 vassert(size < 4);
611 return ops[size];
612 }
613
mkVecQADDEXTUSSATSS(UInt size)614 static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
615 const IROp ops[4]
616 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
617 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
618 vassert(size < 4);
619 return ops[size];
620 }
621
mkVecSUB(UInt size)622 static IROp mkVecSUB ( UInt size ) {
623 const IROp ops[4]
624 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
625 vassert(size < 4);
626 return ops[size];
627 }
628
mkVecQSUBU(UInt size)629 static IROp mkVecQSUBU ( UInt size ) {
630 const IROp ops[4]
631 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
632 vassert(size < 4);
633 return ops[size];
634 }
635
mkVecQSUBS(UInt size)636 static IROp mkVecQSUBS ( UInt size ) {
637 const IROp ops[4]
638 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
639 vassert(size < 4);
640 return ops[size];
641 }
642
mkVecSARN(UInt size)643 static IROp mkVecSARN ( UInt size ) {
644 const IROp ops[4]
645 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
646 vassert(size < 4);
647 return ops[size];
648 }
649
mkVecSHRN(UInt size)650 static IROp mkVecSHRN ( UInt size ) {
651 const IROp ops[4]
652 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
653 vassert(size < 4);
654 return ops[size];
655 }
656
mkVecSHLN(UInt size)657 static IROp mkVecSHLN ( UInt size ) {
658 const IROp ops[4]
659 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
660 vassert(size < 4);
661 return ops[size];
662 }
663
mkVecCATEVENLANES(UInt size)664 static IROp mkVecCATEVENLANES ( UInt size ) {
665 const IROp ops[4]
666 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
667 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
668 vassert(size < 4);
669 return ops[size];
670 }
671
mkVecCATODDLANES(UInt size)672 static IROp mkVecCATODDLANES ( UInt size ) {
673 const IROp ops[4]
674 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
675 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
676 vassert(size < 4);
677 return ops[size];
678 }
679
mkVecINTERLEAVELO(UInt size)680 static IROp mkVecINTERLEAVELO ( UInt size ) {
681 const IROp ops[4]
682 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
683 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
684 vassert(size < 4);
685 return ops[size];
686 }
687
mkVecINTERLEAVEHI(UInt size)688 static IROp mkVecINTERLEAVEHI ( UInt size ) {
689 const IROp ops[4]
690 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
691 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
692 vassert(size < 4);
693 return ops[size];
694 }
695
mkVecMAXU(UInt size)696 static IROp mkVecMAXU ( UInt size ) {
697 const IROp ops[4]
698 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
699 vassert(size < 4);
700 return ops[size];
701 }
702
mkVecMAXS(UInt size)703 static IROp mkVecMAXS ( UInt size ) {
704 const IROp ops[4]
705 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
706 vassert(size < 4);
707 return ops[size];
708 }
709
mkVecMINU(UInt size)710 static IROp mkVecMINU ( UInt size ) {
711 const IROp ops[4]
712 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
713 vassert(size < 4);
714 return ops[size];
715 }
716
mkVecMINS(UInt size)717 static IROp mkVecMINS ( UInt size ) {
718 const IROp ops[4]
719 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
720 vassert(size < 4);
721 return ops[size];
722 }
723
mkVecMUL(UInt size)724 static IROp mkVecMUL ( UInt size ) {
725 const IROp ops[4]
726 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
727 vassert(size < 3);
728 return ops[size];
729 }
730
mkVecMULLU(UInt sizeNarrow)731 static IROp mkVecMULLU ( UInt sizeNarrow ) {
732 const IROp ops[4]
733 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
734 vassert(sizeNarrow < 3);
735 return ops[sizeNarrow];
736 }
737
mkVecMULLS(UInt sizeNarrow)738 static IROp mkVecMULLS ( UInt sizeNarrow ) {
739 const IROp ops[4]
740 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
741 vassert(sizeNarrow < 3);
742 return ops[sizeNarrow];
743 }
744
mkVecQDMULLS(UInt sizeNarrow)745 static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
746 const IROp ops[4]
747 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
748 vassert(sizeNarrow < 3);
749 return ops[sizeNarrow];
750 }
751
mkVecCMPEQ(UInt size)752 static IROp mkVecCMPEQ ( UInt size ) {
753 const IROp ops[4]
754 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
755 vassert(size < 4);
756 return ops[size];
757 }
758
mkVecCMPGTU(UInt size)759 static IROp mkVecCMPGTU ( UInt size ) {
760 const IROp ops[4]
761 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
762 vassert(size < 4);
763 return ops[size];
764 }
765
mkVecCMPGTS(UInt size)766 static IROp mkVecCMPGTS ( UInt size ) {
767 const IROp ops[4]
768 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
769 vassert(size < 4);
770 return ops[size];
771 }
772
mkVecABS(UInt size)773 static IROp mkVecABS ( UInt size ) {
774 const IROp ops[4]
775 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
776 vassert(size < 4);
777 return ops[size];
778 }
779
mkVecZEROHIxxOFV128(UInt size)780 static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
781 const IROp ops[4]
782 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
783 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
784 vassert(size < 4);
785 return ops[size];
786 }
787
mkU(IRType ty,ULong imm)788 static IRExpr* mkU ( IRType ty, ULong imm ) {
789 switch (ty) {
790 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
791 case Ity_I64: return mkU64(imm);
792 default: vpanic("mkU");
793 }
794 }
795
mkVecQDMULHIS(UInt size)796 static IROp mkVecQDMULHIS ( UInt size ) {
797 const IROp ops[4]
798 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
799 vassert(size < 4);
800 return ops[size];
801 }
802
mkVecQRDMULHIS(UInt size)803 static IROp mkVecQRDMULHIS ( UInt size ) {
804 const IROp ops[4]
805 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
806 vassert(size < 4);
807 return ops[size];
808 }
809
mkVecQANDUQSH(UInt size)810 static IROp mkVecQANDUQSH ( UInt size ) {
811 const IROp ops[4]
812 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
813 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
814 vassert(size < 4);
815 return ops[size];
816 }
817
mkVecQANDSQSH(UInt size)818 static IROp mkVecQANDSQSH ( UInt size ) {
819 const IROp ops[4]
820 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
821 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
822 vassert(size < 4);
823 return ops[size];
824 }
825
mkVecQANDUQRSH(UInt size)826 static IROp mkVecQANDUQRSH ( UInt size ) {
827 const IROp ops[4]
828 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
829 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
830 vassert(size < 4);
831 return ops[size];
832 }
833
mkVecQANDSQRSH(UInt size)834 static IROp mkVecQANDSQRSH ( UInt size ) {
835 const IROp ops[4]
836 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
837 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
838 vassert(size < 4);
839 return ops[size];
840 }
841
mkVecSHU(UInt size)842 static IROp mkVecSHU ( UInt size ) {
843 const IROp ops[4]
844 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
845 vassert(size < 4);
846 return ops[size];
847 }
848
mkVecSHS(UInt size)849 static IROp mkVecSHS ( UInt size ) {
850 const IROp ops[4]
851 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
852 vassert(size < 4);
853 return ops[size];
854 }
855
mkVecRSHU(UInt size)856 static IROp mkVecRSHU ( UInt size ) {
857 const IROp ops[4]
858 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
859 vassert(size < 4);
860 return ops[size];
861 }
862
mkVecRSHS(UInt size)863 static IROp mkVecRSHS ( UInt size ) {
864 const IROp ops[4]
865 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
866 vassert(size < 4);
867 return ops[size];
868 }
869
mkVecNARROWUN(UInt sizeNarrow)870 static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
871 const IROp ops[4]
872 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
873 Iop_NarrowUn64to32x2, Iop_INVALID };
874 vassert(sizeNarrow < 4);
875 return ops[sizeNarrow];
876 }
877
mkVecQNARROWUNSU(UInt sizeNarrow)878 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
879 const IROp ops[4]
880 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
881 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
882 vassert(sizeNarrow < 4);
883 return ops[sizeNarrow];
884 }
885
mkVecQNARROWUNSS(UInt sizeNarrow)886 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
887 const IROp ops[4]
888 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
889 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
890 vassert(sizeNarrow < 4);
891 return ops[sizeNarrow];
892 }
893
mkVecQNARROWUNUU(UInt sizeNarrow)894 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
895 const IROp ops[4]
896 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
897 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
898 vassert(sizeNarrow < 4);
899 return ops[sizeNarrow];
900 }
901
mkVecQANDqshrNNARROWUU(UInt sizeNarrow)902 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
903 const IROp ops[4]
904 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
905 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
906 vassert(sizeNarrow < 4);
907 return ops[sizeNarrow];
908 }
909
mkVecQANDqsarNNARROWSS(UInt sizeNarrow)910 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
911 const IROp ops[4]
912 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
913 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
914 vassert(sizeNarrow < 4);
915 return ops[sizeNarrow];
916 }
917
mkVecQANDqsarNNARROWSU(UInt sizeNarrow)918 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
919 const IROp ops[4]
920 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
921 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
922 vassert(sizeNarrow < 4);
923 return ops[sizeNarrow];
924 }
925
mkVecQANDqrshrNNARROWUU(UInt sizeNarrow)926 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
927 const IROp ops[4]
928 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
929 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
930 vassert(sizeNarrow < 4);
931 return ops[sizeNarrow];
932 }
933
mkVecQANDqrsarNNARROWSS(UInt sizeNarrow)934 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
935 const IROp ops[4]
936 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
937 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
938 vassert(sizeNarrow < 4);
939 return ops[sizeNarrow];
940 }
941
mkVecQANDqrsarNNARROWSU(UInt sizeNarrow)942 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
943 const IROp ops[4]
944 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
945 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
946 vassert(sizeNarrow < 4);
947 return ops[sizeNarrow];
948 }
949
mkVecQSHLNSATUU(UInt size)950 static IROp mkVecQSHLNSATUU ( UInt size ) {
951 const IROp ops[4]
952 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
953 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
954 vassert(size < 4);
955 return ops[size];
956 }
957
mkVecQSHLNSATSS(UInt size)958 static IROp mkVecQSHLNSATSS ( UInt size ) {
959 const IROp ops[4]
960 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
961 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
962 vassert(size < 4);
963 return ops[size];
964 }
965
mkVecQSHLNSATSU(UInt size)966 static IROp mkVecQSHLNSATSU ( UInt size ) {
967 const IROp ops[4]
968 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
969 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
970 vassert(size < 4);
971 return ops[size];
972 }
973
mkVecADDF(UInt size)974 static IROp mkVecADDF ( UInt size ) {
975 const IROp ops[4]
976 = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
977 vassert(size < 4);
978 return ops[size];
979 }
980
mkVecMAXF(UInt size)981 static IROp mkVecMAXF ( UInt size ) {
982 const IROp ops[4]
983 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
984 vassert(size < 4);
985 return ops[size];
986 }
987
mkVecMINF(UInt size)988 static IROp mkVecMINF ( UInt size ) {
989 const IROp ops[4]
990 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
991 vassert(size < 4);
992 return ops[size];
993 }
994
995 /* Generate IR to create 'arg rotated right by imm', for sane values
996 of 'ty' and 'imm'. */
mathROR(IRType ty,IRTemp arg,UInt imm)997 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
998 {
999 UInt w = 0;
1000 if (ty == Ity_I64) {
1001 w = 64;
1002 } else {
1003 vassert(ty == Ity_I32);
1004 w = 32;
1005 }
1006 vassert(w != 0);
1007 vassert(imm < w);
1008 if (imm == 0) {
1009 return arg;
1010 }
1011 IRTemp res = newTemp(ty);
1012 assign(res, binop(mkOR(ty),
1013 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1014 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1015 return res;
1016 }
1017
1018 /* Generate IR to set the returned temp to either all-zeroes or
1019 all ones, as a copy of arg<imm>. */
mathREPLICATE(IRType ty,IRTemp arg,UInt imm)1020 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1021 {
1022 UInt w = 0;
1023 if (ty == Ity_I64) {
1024 w = 64;
1025 } else {
1026 vassert(ty == Ity_I32);
1027 w = 32;
1028 }
1029 vassert(w != 0);
1030 vassert(imm < w);
1031 IRTemp res = newTemp(ty);
1032 assign(res, binop(mkSAR(ty),
1033 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1034 mkU8(w - 1)));
1035 return res;
1036 }
1037
1038 /* U-widen 8/16/32/64 bit int expr to 64. */
widenUto64(IRType srcTy,IRExpr * e)1039 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1040 {
1041 switch (srcTy) {
1042 case Ity_I64: return e;
1043 case Ity_I32: return unop(Iop_32Uto64, e);
1044 case Ity_I16: return unop(Iop_16Uto64, e);
1045 case Ity_I8: return unop(Iop_8Uto64, e);
1046 default: vpanic("widenUto64(arm64)");
1047 }
1048 }
1049
1050 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1051 of these combinations make sense. */
narrowFrom64(IRType dstTy,IRExpr * e)1052 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1053 {
1054 switch (dstTy) {
1055 case Ity_I64: return e;
1056 case Ity_I32: return unop(Iop_64to32, e);
1057 case Ity_I16: return unop(Iop_64to16, e);
1058 case Ity_I8: return unop(Iop_64to8, e);
1059 default: vpanic("narrowFrom64(arm64)");
1060 }
1061 }
1062
1063
1064 /*------------------------------------------------------------*/
1065 /*--- Helpers for accessing guest registers. ---*/
1066 /*------------------------------------------------------------*/
1067
1068 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1069 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1070 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1071 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1072 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1073 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1074 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1075 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1076 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1077 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1078 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1079 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1080 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1081 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1082 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1083 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1084 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1085 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1086 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1087 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1088 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1089 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1090 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1091 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1092 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1093 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1094 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1095 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1096 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1097 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1098 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1099
1100 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
1101 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1102
1103 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1104 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1105 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1106 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1107
1108 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1109 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1110
1111 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1112 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1113 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1114 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1115 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1116 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1117 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1118 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1119 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1120 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1121 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1122 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1123 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1124 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1125 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1126 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1127 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1128 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1129 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1130 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1131 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1132 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1133 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1134 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1135 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1136 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1137 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1138 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1139 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1140 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1141 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1142 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1143
1144 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
1145 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
1146
1147 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1148 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
1149
1150 #define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
1151 #define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
1152 #define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA)
1153
1154
1155 /* ---------------- Integer registers ---------------- */
1156
offsetIReg64(UInt iregNo)1157 static Int offsetIReg64 ( UInt iregNo )
1158 {
1159 /* Do we care about endianness here? We do if sub-parts of integer
1160 registers are accessed. */
1161 switch (iregNo) {
1162 case 0: return OFFB_X0;
1163 case 1: return OFFB_X1;
1164 case 2: return OFFB_X2;
1165 case 3: return OFFB_X3;
1166 case 4: return OFFB_X4;
1167 case 5: return OFFB_X5;
1168 case 6: return OFFB_X6;
1169 case 7: return OFFB_X7;
1170 case 8: return OFFB_X8;
1171 case 9: return OFFB_X9;
1172 case 10: return OFFB_X10;
1173 case 11: return OFFB_X11;
1174 case 12: return OFFB_X12;
1175 case 13: return OFFB_X13;
1176 case 14: return OFFB_X14;
1177 case 15: return OFFB_X15;
1178 case 16: return OFFB_X16;
1179 case 17: return OFFB_X17;
1180 case 18: return OFFB_X18;
1181 case 19: return OFFB_X19;
1182 case 20: return OFFB_X20;
1183 case 21: return OFFB_X21;
1184 case 22: return OFFB_X22;
1185 case 23: return OFFB_X23;
1186 case 24: return OFFB_X24;
1187 case 25: return OFFB_X25;
1188 case 26: return OFFB_X26;
1189 case 27: return OFFB_X27;
1190 case 28: return OFFB_X28;
1191 case 29: return OFFB_X29;
1192 case 30: return OFFB_X30;
1193 /* but not 31 */
1194 default: vassert(0);
1195 }
1196 }
1197
offsetIReg64orSP(UInt iregNo)1198 static Int offsetIReg64orSP ( UInt iregNo )
1199 {
1200 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
1201 }
1202
nameIReg64orZR(UInt iregNo)1203 static const HChar* nameIReg64orZR ( UInt iregNo )
1204 {
1205 vassert(iregNo < 32);
1206 static const HChar* names[32]
1207 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1208 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1209 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1210 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1211 return names[iregNo];
1212 }
1213
nameIReg64orSP(UInt iregNo)1214 static const HChar* nameIReg64orSP ( UInt iregNo )
1215 {
1216 if (iregNo == 31) {
1217 return "sp";
1218 }
1219 vassert(iregNo < 31);
1220 return nameIReg64orZR(iregNo);
1221 }
1222
getIReg64orSP(UInt iregNo)1223 static IRExpr* getIReg64orSP ( UInt iregNo )
1224 {
1225 vassert(iregNo < 32);
1226 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1227 }
1228
getIReg64orZR(UInt iregNo)1229 static IRExpr* getIReg64orZR ( UInt iregNo )
1230 {
1231 if (iregNo == 31) {
1232 return mkU64(0);
1233 }
1234 vassert(iregNo < 31);
1235 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1236 }
1237
putIReg64orSP(UInt iregNo,IRExpr * e)1238 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1239 {
1240 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1241 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1242 }
1243
putIReg64orZR(UInt iregNo,IRExpr * e)1244 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1245 {
1246 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1247 if (iregNo == 31) {
1248 return;
1249 }
1250 vassert(iregNo < 31);
1251 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1252 }
1253
nameIReg32orZR(UInt iregNo)1254 static const HChar* nameIReg32orZR ( UInt iregNo )
1255 {
1256 vassert(iregNo < 32);
1257 static const HChar* names[32]
1258 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1259 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1260 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1261 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1262 return names[iregNo];
1263 }
1264
nameIReg32orSP(UInt iregNo)1265 static const HChar* nameIReg32orSP ( UInt iregNo )
1266 {
1267 if (iregNo == 31) {
1268 return "wsp";
1269 }
1270 vassert(iregNo < 31);
1271 return nameIReg32orZR(iregNo);
1272 }
1273
getIReg32orSP(UInt iregNo)1274 static IRExpr* getIReg32orSP ( UInt iregNo )
1275 {
1276 vassert(iregNo < 32);
1277 return unop(Iop_64to32,
1278 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1279 }
1280
getIReg32orZR(UInt iregNo)1281 static IRExpr* getIReg32orZR ( UInt iregNo )
1282 {
1283 if (iregNo == 31) {
1284 return mkU32(0);
1285 }
1286 vassert(iregNo < 31);
1287 return unop(Iop_64to32,
1288 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1289 }
1290
putIReg32orSP(UInt iregNo,IRExpr * e)1291 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1292 {
1293 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1294 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1295 }
1296
putIReg32orZR(UInt iregNo,IRExpr * e)1297 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1298 {
1299 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1300 if (iregNo == 31) {
1301 return;
1302 }
1303 vassert(iregNo < 31);
1304 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1305 }
1306
nameIRegOrSP(Bool is64,UInt iregNo)1307 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1308 {
1309 vassert(is64 == True || is64 == False);
1310 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1311 }
1312
nameIRegOrZR(Bool is64,UInt iregNo)1313 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1314 {
1315 vassert(is64 == True || is64 == False);
1316 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1317 }
1318
getIRegOrZR(Bool is64,UInt iregNo)1319 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1320 {
1321 vassert(is64 == True || is64 == False);
1322 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1323 }
1324
putIRegOrZR(Bool is64,UInt iregNo,IRExpr * e)1325 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1326 {
1327 vassert(is64 == True || is64 == False);
1328 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1329 }
1330
putPC(IRExpr * e)1331 static void putPC ( IRExpr* e )
1332 {
1333 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1334 stmt( IRStmt_Put(OFFB_PC, e) );
1335 }
1336
1337
1338 /* ---------------- Vector (Q) registers ---------------- */
1339
offsetQReg128(UInt qregNo)1340 static Int offsetQReg128 ( UInt qregNo )
1341 {
1342 /* We don't care about endianness at this point. It only becomes
1343 relevant when dealing with sections of these registers.*/
1344 switch (qregNo) {
1345 case 0: return OFFB_Q0;
1346 case 1: return OFFB_Q1;
1347 case 2: return OFFB_Q2;
1348 case 3: return OFFB_Q3;
1349 case 4: return OFFB_Q4;
1350 case 5: return OFFB_Q5;
1351 case 6: return OFFB_Q6;
1352 case 7: return OFFB_Q7;
1353 case 8: return OFFB_Q8;
1354 case 9: return OFFB_Q9;
1355 case 10: return OFFB_Q10;
1356 case 11: return OFFB_Q11;
1357 case 12: return OFFB_Q12;
1358 case 13: return OFFB_Q13;
1359 case 14: return OFFB_Q14;
1360 case 15: return OFFB_Q15;
1361 case 16: return OFFB_Q16;
1362 case 17: return OFFB_Q17;
1363 case 18: return OFFB_Q18;
1364 case 19: return OFFB_Q19;
1365 case 20: return OFFB_Q20;
1366 case 21: return OFFB_Q21;
1367 case 22: return OFFB_Q22;
1368 case 23: return OFFB_Q23;
1369 case 24: return OFFB_Q24;
1370 case 25: return OFFB_Q25;
1371 case 26: return OFFB_Q26;
1372 case 27: return OFFB_Q27;
1373 case 28: return OFFB_Q28;
1374 case 29: return OFFB_Q29;
1375 case 30: return OFFB_Q30;
1376 case 31: return OFFB_Q31;
1377 default: vassert(0);
1378 }
1379 }
1380
1381 /* Write to a complete Qreg. */
putQReg128(UInt qregNo,IRExpr * e)1382 static void putQReg128 ( UInt qregNo, IRExpr* e )
1383 {
1384 vassert(qregNo < 32);
1385 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1386 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1387 }
1388
1389 /* Read a complete Qreg. */
getQReg128(UInt qregNo)1390 static IRExpr* getQReg128 ( UInt qregNo )
1391 {
1392 vassert(qregNo < 32);
1393 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1394 }
1395
1396 /* Produce the IR type for some sub-part of a vector. For 32- and 64-
1397 bit sub-parts we can choose either integer or float types, and
1398 choose float on the basis that that is the common use case and so
1399 will give least interference with Put-to-Get forwarding later
1400 on. */
preferredVectorSubTypeFromSize(UInt szB)1401 static IRType preferredVectorSubTypeFromSize ( UInt szB )
1402 {
1403 switch (szB) {
1404 case 1: return Ity_I8;
1405 case 2: return Ity_I16;
1406 case 4: return Ity_I32; //Ity_F32;
1407 case 8: return Ity_F64;
1408 case 16: return Ity_V128;
1409 default: vassert(0);
1410 }
1411 }
1412
1413 /* Find the offset of the laneNo'th lane of type laneTy in the given
1414 Qreg. Since the host is little-endian, the least significant lane
1415 has the lowest offset. */
offsetQRegLane(UInt qregNo,IRType laneTy,UInt laneNo)1416 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
1417 {
1418 vassert(host_endness == VexEndnessLE);
1419 Int base = offsetQReg128(qregNo);
1420 /* Since the host is little-endian, the least significant lane
1421 will be at the lowest address. */
1422 /* Restrict this to known types, so as to avoid silently accepting
1423 stupid types. */
1424 UInt laneSzB = 0;
1425 switch (laneTy) {
1426 case Ity_I8: laneSzB = 1; break;
1427 case Ity_F16: case Ity_I16: laneSzB = 2; break;
1428 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1429 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1430 case Ity_V128: laneSzB = 16; break;
1431 default: break;
1432 }
1433 vassert(laneSzB > 0);
1434 UInt minOff = laneNo * laneSzB;
1435 UInt maxOff = minOff + laneSzB - 1;
1436 vassert(maxOff < 16);
1437 return base + minOff;
1438 }
1439
1440 /* Put to the least significant lane of a Qreg. */
putQRegLO(UInt qregNo,IRExpr * e)1441 static void putQRegLO ( UInt qregNo, IRExpr* e )
1442 {
1443 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1444 Int off = offsetQRegLane(qregNo, ty, 0);
1445 switch (ty) {
1446 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
1447 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
1448 break;
1449 default:
1450 vassert(0); // Other cases are probably invalid
1451 }
1452 stmt(IRStmt_Put(off, e));
1453 }
1454
1455 /* Get from the least significant lane of a Qreg. */
getQRegLO(UInt qregNo,IRType ty)1456 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1457 {
1458 Int off = offsetQRegLane(qregNo, ty, 0);
1459 switch (ty) {
1460 case Ity_I8:
1461 case Ity_F16: case Ity_I16:
1462 case Ity_I32: case Ity_I64:
1463 case Ity_F32: case Ity_F64: case Ity_V128:
1464 break;
1465 default:
1466 vassert(0); // Other cases are ATC
1467 }
1468 return IRExpr_Get(off, ty);
1469 }
1470
nameQRegLO(UInt qregNo,IRType laneTy)1471 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1472 {
1473 static const HChar* namesQ[32]
1474 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1475 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1476 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1477 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1478 static const HChar* namesD[32]
1479 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1480 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1481 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1482 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1483 static const HChar* namesS[32]
1484 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1485 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1486 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1487 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1488 static const HChar* namesH[32]
1489 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1490 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1491 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1492 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1493 static const HChar* namesB[32]
1494 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1495 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1496 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1497 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1498 vassert(qregNo < 32);
1499 switch (sizeofIRType(laneTy)) {
1500 case 1: return namesB[qregNo];
1501 case 2: return namesH[qregNo];
1502 case 4: return namesS[qregNo];
1503 case 8: return namesD[qregNo];
1504 case 16: return namesQ[qregNo];
1505 default: vassert(0);
1506 }
1507 /*NOTREACHED*/
1508 }
1509
nameQReg128(UInt qregNo)1510 static const HChar* nameQReg128 ( UInt qregNo )
1511 {
1512 return nameQRegLO(qregNo, Ity_V128);
1513 }
1514
1515 /* Find the offset of the most significant half (8 bytes) of the given
1516 Qreg. This requires knowing the endianness of the host. */
offsetQRegHI64(UInt qregNo)1517 static Int offsetQRegHI64 ( UInt qregNo )
1518 {
1519 return offsetQRegLane(qregNo, Ity_I64, 1);
1520 }
1521
getQRegHI64(UInt qregNo)1522 static IRExpr* getQRegHI64 ( UInt qregNo )
1523 {
1524 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1525 }
1526
putQRegHI64(UInt qregNo,IRExpr * e)1527 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1528 {
1529 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1530 Int off = offsetQRegHI64(qregNo);
1531 switch (ty) {
1532 case Ity_I64: case Ity_F64:
1533 break;
1534 default:
1535 vassert(0); // Other cases are plain wrong
1536 }
1537 stmt(IRStmt_Put(off, e));
1538 }
1539
1540 /* Put to a specified lane of a Qreg. */
putQRegLane(UInt qregNo,UInt laneNo,IRExpr * e)1541 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1542 {
1543 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1544 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1545 switch (laneTy) {
1546 case Ity_F64: case Ity_I64:
1547 case Ity_I32: case Ity_F32:
1548 case Ity_I16: case Ity_F16:
1549 case Ity_I8:
1550 break;
1551 default:
1552 vassert(0); // Other cases are ATC
1553 }
1554 stmt(IRStmt_Put(off, e));
1555 }
1556
1557 /* Get from a specified lane of a Qreg. */
getQRegLane(UInt qregNo,UInt laneNo,IRType laneTy)1558 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1559 {
1560 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1561 switch (laneTy) {
1562 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1563 case Ity_F64: case Ity_F32: case Ity_F16:
1564 break;
1565 default:
1566 vassert(0); // Other cases are ATC
1567 }
1568 return IRExpr_Get(off, laneTy);
1569 }
1570
1571
1572 //ZZ /* ---------------- Misc registers ---------------- */
1573 //ZZ
1574 //ZZ static void putMiscReg32 ( UInt gsoffset,
1575 //ZZ IRExpr* e, /* :: Ity_I32 */
1576 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1577 //ZZ {
1578 //ZZ switch (gsoffset) {
1579 //ZZ case OFFB_FPSCR: break;
1580 //ZZ case OFFB_QFLAG32: break;
1581 //ZZ case OFFB_GEFLAG0: break;
1582 //ZZ case OFFB_GEFLAG1: break;
1583 //ZZ case OFFB_GEFLAG2: break;
1584 //ZZ case OFFB_GEFLAG3: break;
1585 //ZZ default: vassert(0); /* awaiting more cases */
1586 //ZZ }
1587 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1588 //ZZ
1589 //ZZ if (guardT == IRTemp_INVALID) {
1590 //ZZ /* unconditional write */
1591 //ZZ stmt(IRStmt_Put(gsoffset, e));
1592 //ZZ } else {
1593 //ZZ stmt(IRStmt_Put(
1594 //ZZ gsoffset,
1595 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1596 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1597 //ZZ ));
1598 //ZZ }
1599 //ZZ }
1600 //ZZ
1601 //ZZ static IRTemp get_ITSTATE ( void )
1602 //ZZ {
1603 //ZZ ASSERT_IS_THUMB;
1604 //ZZ IRTemp t = newTemp(Ity_I32);
1605 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1606 //ZZ return t;
1607 //ZZ }
1608 //ZZ
1609 //ZZ static void put_ITSTATE ( IRTemp t )
1610 //ZZ {
1611 //ZZ ASSERT_IS_THUMB;
1612 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1613 //ZZ }
1614 //ZZ
1615 //ZZ static IRTemp get_QFLAG32 ( void )
1616 //ZZ {
1617 //ZZ IRTemp t = newTemp(Ity_I32);
1618 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1619 //ZZ return t;
1620 //ZZ }
1621 //ZZ
1622 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1623 //ZZ {
1624 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1625 //ZZ }
1626 //ZZ
1627 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1628 //ZZ Status Register) to indicate that overflow or saturation occurred.
1629 //ZZ Nb: t must be zero to denote no saturation, and any nonzero
1630 //ZZ value to indicate saturation. */
1631 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1632 //ZZ {
1633 //ZZ IRTemp old = get_QFLAG32();
1634 //ZZ IRTemp nyu = newTemp(Ity_I32);
1635 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1636 //ZZ put_QFLAG32(nyu, condT);
1637 //ZZ }
1638
1639
1640 /* ---------------- FPCR stuff ---------------- */
1641
1642 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1643 convert them to IR format. Bind the final result to the
1644 returned temp. */
mk_get_IR_rounding_mode(void)1645 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1646 {
1647 /* The ARMvfp encoding for rounding mode bits is:
1648 00 to nearest
1649 01 to +infinity
1650 10 to -infinity
1651 11 to zero
1652 We need to convert that to the IR encoding:
1653 00 to nearest (the default)
1654 10 to +infinity
1655 01 to -infinity
1656 11 to zero
1657 Which can be done by swapping bits 0 and 1.
1658 The rmode bits are at 23:22 in FPSCR.
1659 */
1660 IRTemp armEncd = newTemp(Ity_I32);
1661 IRTemp swapped = newTemp(Ity_I32);
1662 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1663 we don't zero out bits 24 and above, since the assignment to
1664 'swapped' will mask them out anyway. */
1665 assign(armEncd,
1666 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1667 /* Now swap them. */
1668 assign(swapped,
1669 binop(Iop_Or32,
1670 binop(Iop_And32,
1671 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1672 mkU32(2)),
1673 binop(Iop_And32,
1674 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1675 mkU32(1))
1676 ));
1677 return swapped;
1678 }
1679
1680
1681 /*------------------------------------------------------------*/
1682 /*--- Helpers for flag handling and conditional insns ---*/
1683 /*------------------------------------------------------------*/
1684
nameARM64Condcode(ARM64Condcode cond)1685 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1686 {
1687 switch (cond) {
1688 case ARM64CondEQ: return "eq";
1689 case ARM64CondNE: return "ne";
1690 case ARM64CondCS: return "cs"; // or 'hs'
1691 case ARM64CondCC: return "cc"; // or 'lo'
1692 case ARM64CondMI: return "mi";
1693 case ARM64CondPL: return "pl";
1694 case ARM64CondVS: return "vs";
1695 case ARM64CondVC: return "vc";
1696 case ARM64CondHI: return "hi";
1697 case ARM64CondLS: return "ls";
1698 case ARM64CondGE: return "ge";
1699 case ARM64CondLT: return "lt";
1700 case ARM64CondGT: return "gt";
1701 case ARM64CondLE: return "le";
1702 case ARM64CondAL: return "al";
1703 case ARM64CondNV: return "nv";
1704 default: vpanic("name_ARM64Condcode");
1705 }
1706 }
1707
1708 /* and a handy shorthand for it */
nameCC(ARM64Condcode cond)1709 static const HChar* nameCC ( ARM64Condcode cond ) {
1710 return nameARM64Condcode(cond);
1711 }
1712
1713
1714 /* Build IR to calculate some particular condition from stored
1715 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1716 Ity_I64, suitable for narrowing. Although the return type is
1717 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1718 :: Ity_I64 and must denote the condition to compute in
1719 bits 7:4, and be zero everywhere else.
1720 */
mk_arm64g_calculate_condition_dyn(IRExpr * cond)1721 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1722 {
1723 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1724 /* And 'cond' had better produce a value in which only bits 7:4 are
1725 nonzero. However, obviously we can't assert for that. */
1726
1727 /* So what we're constructing for the first argument is
1728 "(cond << 4) | stored-operation".
1729 However, as per comments above, 'cond' must be supplied
1730 pre-shifted to this function.
1731
1732 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1733 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1734 8 bits of the first argument. */
1735 IRExpr** args
1736 = mkIRExprVec_4(
1737 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1738 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1739 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1740 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1741 );
1742 IRExpr* call
1743 = mkIRExprCCall(
1744 Ity_I64,
1745 0/*regparm*/,
1746 "arm64g_calculate_condition", &arm64g_calculate_condition,
1747 args
1748 );
1749
1750 /* Exclude the requested condition, OP and NDEP from definedness
1751 checking. We're only interested in DEP1 and DEP2. */
1752 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1753 return call;
1754 }
1755
1756
1757 /* Build IR to calculate some particular condition from stored
1758 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1759 Ity_I64, suitable for narrowing. Although the return type is
1760 Ity_I64, the returned value is either 0 or 1.
1761 */
mk_arm64g_calculate_condition(ARM64Condcode cond)1762 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1763 {
1764 /* First arg is "(cond << 4) | condition". This requires that the
1765 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1766 (COND, OP) pair in the lowest 8 bits of the first argument. */
1767 vassert(cond >= 0 && cond <= 15);
1768 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1769 }
1770
1771
1772 /* Build IR to calculate just the carry flag from stored
1773 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1774 Ity_I64. */
mk_arm64g_calculate_flag_c(void)1775 static IRExpr* mk_arm64g_calculate_flag_c ( void )
1776 {
1777 IRExpr** args
1778 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1779 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1780 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1781 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1782 IRExpr* call
1783 = mkIRExprCCall(
1784 Ity_I64,
1785 0/*regparm*/,
1786 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1787 args
1788 );
1789 /* Exclude OP and NDEP from definedness checking. We're only
1790 interested in DEP1 and DEP2. */
1791 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1792 return call;
1793 }
1794
1795
1796 //ZZ /* Build IR to calculate just the overflow flag from stored
1797 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1798 //ZZ Ity_I32. */
1799 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1800 //ZZ {
1801 //ZZ IRExpr** args
1802 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1803 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1804 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1805 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1806 //ZZ IRExpr* call
1807 //ZZ = mkIRExprCCall(
1808 //ZZ Ity_I32,
1809 //ZZ 0/*regparm*/,
1810 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1811 //ZZ args
1812 //ZZ );
1813 //ZZ /* Exclude OP and NDEP from definedness checking. We're only
1814 //ZZ interested in DEP1 and DEP2. */
1815 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1816 //ZZ return call;
1817 //ZZ }
1818
1819
1820 /* Build IR to calculate N Z C V in bits 31:28 of the
1821 returned word. */
mk_arm64g_calculate_flags_nzcv(void)1822 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1823 {
1824 IRExpr** args
1825 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1826 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1827 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1828 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1829 IRExpr* call
1830 = mkIRExprCCall(
1831 Ity_I64,
1832 0/*regparm*/,
1833 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1834 args
1835 );
1836 /* Exclude OP and NDEP from definedness checking. We're only
1837 interested in DEP1 and DEP2. */
1838 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1839 return call;
1840 }
1841
1842
1843 /* Build IR to set the flags thunk, in the most general case. */
1844 static
setFlags_D1_D2_ND(UInt cc_op,IRTemp t_dep1,IRTemp t_dep2,IRTemp t_ndep)1845 void setFlags_D1_D2_ND ( UInt cc_op,
1846 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1847 {
1848 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1849 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1850 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1851 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1852 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1853 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1854 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1855 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1856 }
1857
1858 /* Build IR to set the flags thunk after ADD or SUB. */
1859 static
setFlags_ADD_SUB(Bool is64,Bool isSUB,IRTemp argL,IRTemp argR)1860 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1861 {
1862 IRTemp argL64 = IRTemp_INVALID;
1863 IRTemp argR64 = IRTemp_INVALID;
1864 IRTemp z64 = newTemp(Ity_I64);
1865 if (is64) {
1866 argL64 = argL;
1867 argR64 = argR;
1868 } else {
1869 argL64 = newTemp(Ity_I64);
1870 argR64 = newTemp(Ity_I64);
1871 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1872 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1873 }
1874 assign(z64, mkU64(0));
1875 UInt cc_op = ARM64G_CC_OP_NUMBER;
1876 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1877 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1878 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1879 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1880 else { vassert(0); }
1881 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1882 }
1883
1884 /* Build IR to set the flags thunk after ADC or SBC. */
1885 static
setFlags_ADC_SBC(Bool is64,Bool isSBC,IRTemp argL,IRTemp argR,IRTemp oldC)1886 void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1887 IRTemp argL, IRTemp argR, IRTemp oldC )
1888 {
1889 IRTemp argL64 = IRTemp_INVALID;
1890 IRTemp argR64 = IRTemp_INVALID;
1891 IRTemp oldC64 = IRTemp_INVALID;
1892 if (is64) {
1893 argL64 = argL;
1894 argR64 = argR;
1895 oldC64 = oldC;
1896 } else {
1897 argL64 = newTemp(Ity_I64);
1898 argR64 = newTemp(Ity_I64);
1899 oldC64 = newTemp(Ity_I64);
1900 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1901 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1902 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1903 }
1904 UInt cc_op = ARM64G_CC_OP_NUMBER;
1905 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1906 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1907 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1908 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1909 else { vassert(0); }
1910 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1911 }
1912
1913 /* Build IR to set the flags thunk after ADD or SUB, if the given
1914 condition evaluates to True at run time. If not, the flags are set
1915 to the specified NZCV value. */
1916 static
setFlags_ADD_SUB_conditionally(Bool is64,Bool isSUB,IRTemp cond,IRTemp argL,IRTemp argR,UInt nzcv)1917 void setFlags_ADD_SUB_conditionally (
1918 Bool is64, Bool isSUB,
1919 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1920 )
1921 {
1922 /* Generate IR as follows:
1923 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1924 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1925 CC_DEP2 = ITE(cond, argR64, 0)
1926 CC_NDEP = 0
1927 */
1928
1929 IRTemp z64 = newTemp(Ity_I64);
1930 assign(z64, mkU64(0));
1931
1932 /* Establish the operation and operands for the True case. */
1933 IRTemp t_dep1 = IRTemp_INVALID;
1934 IRTemp t_dep2 = IRTemp_INVALID;
1935 UInt t_op = ARM64G_CC_OP_NUMBER;
1936 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1937 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1938 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1939 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1940 else { vassert(0); }
1941 /* */
1942 if (is64) {
1943 t_dep1 = argL;
1944 t_dep2 = argR;
1945 } else {
1946 t_dep1 = newTemp(Ity_I64);
1947 t_dep2 = newTemp(Ity_I64);
1948 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1949 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1950 }
1951
1952 /* Establish the operation and operands for the False case. */
1953 IRTemp f_dep1 = newTemp(Ity_I64);
1954 IRTemp f_dep2 = z64;
1955 UInt f_op = ARM64G_CC_OP_COPY;
1956 assign(f_dep1, mkU64(nzcv << 28));
1957
1958 /* Final thunk values */
1959 IRTemp dep1 = newTemp(Ity_I64);
1960 IRTemp dep2 = newTemp(Ity_I64);
1961 IRTemp op = newTemp(Ity_I64);
1962
1963 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1964 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1965 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1966
1967 /* finally .. */
1968 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1969 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1970 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1971 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1972 }
1973
1974 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1975 static
setFlags_LOGIC(Bool is64,IRTemp res)1976 void setFlags_LOGIC ( Bool is64, IRTemp res )
1977 {
1978 IRTemp res64 = IRTemp_INVALID;
1979 IRTemp z64 = newTemp(Ity_I64);
1980 UInt cc_op = ARM64G_CC_OP_NUMBER;
1981 if (is64) {
1982 res64 = res;
1983 cc_op = ARM64G_CC_OP_LOGIC64;
1984 } else {
1985 res64 = newTemp(Ity_I64);
1986 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1987 cc_op = ARM64G_CC_OP_LOGIC32;
1988 }
1989 assign(z64, mkU64(0));
1990 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1991 }
1992
1993 /* Build IR to set the flags thunk to a given NZCV value. NZCV is
1994 located in bits 31:28 of the supplied value. */
1995 static
setFlags_COPY(IRTemp nzcv_28x0)1996 void setFlags_COPY ( IRTemp nzcv_28x0 )
1997 {
1998 IRTemp z64 = newTemp(Ity_I64);
1999 assign(z64, mkU64(0));
2000 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
2001 }
2002
2003
2004 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2005 //ZZ sets it at all) */
2006 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2007 //ZZ IRTemp t_dep2,
2008 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2009 //ZZ {
2010 //ZZ IRTemp z32 = newTemp(Ity_I32);
2011 //ZZ assign( z32, mkU32(0) );
2012 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2013 //ZZ }
2014 //ZZ
2015 //ZZ
2016 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2017 //ZZ sets it at all) */
2018 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2019 //ZZ IRTemp t_ndep,
2020 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2021 //ZZ {
2022 //ZZ IRTemp z32 = newTemp(Ity_I32);
2023 //ZZ assign( z32, mkU32(0) );
2024 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2025 //ZZ }
2026 //ZZ
2027 //ZZ
2028 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2029 //ZZ sets them at all) */
2030 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2031 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2032 //ZZ {
2033 //ZZ IRTemp z32 = newTemp(Ity_I32);
2034 //ZZ assign( z32, mkU32(0) );
2035 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2036 //ZZ }
2037
2038
2039 /*------------------------------------------------------------*/
2040 /*--- Misc math helpers ---*/
2041 /*------------------------------------------------------------*/
2042
2043 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
math_SWAPHELPER(IRTemp x,ULong mask,Int sh)2044 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
2045 {
2046 IRTemp maskT = newTemp(Ity_I64);
2047 IRTemp res = newTemp(Ity_I64);
2048 vassert(sh >= 1 && sh <= 63);
2049 assign(maskT, mkU64(mask));
2050 assign( res,
2051 binop(Iop_Or64,
2052 binop(Iop_Shr64,
2053 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2054 mkU8(sh)),
2055 binop(Iop_And64,
2056 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2057 mkexpr(maskT))
2058 )
2059 );
2060 return res;
2061 }
2062
2063 /* Generates byte swaps within 32-bit lanes. */
math_UINTSWAP64(IRTemp src)2064 static IRTemp math_UINTSWAP64 ( IRTemp src )
2065 {
2066 IRTemp res;
2067 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2068 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2069 return res;
2070 }
2071
2072 /* Generates byte swaps within 16-bit lanes. */
math_USHORTSWAP64(IRTemp src)2073 static IRTemp math_USHORTSWAP64 ( IRTemp src )
2074 {
2075 IRTemp res;
2076 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2077 return res;
2078 }
2079
2080 /* Generates a 64-bit byte swap. */
math_BYTESWAP64(IRTemp src)2081 static IRTemp math_BYTESWAP64 ( IRTemp src )
2082 {
2083 IRTemp res;
2084 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2085 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2086 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2087 return res;
2088 }
2089
2090 /* Generates a 64-bit bit swap. */
math_BITSWAP64(IRTemp src)2091 static IRTemp math_BITSWAP64 ( IRTemp src )
2092 {
2093 IRTemp res;
2094 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2095 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2096 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2097 return math_BYTESWAP64(res);
2098 }
2099
2100 /* Duplicates the bits at the bottom of the given word to fill the
2101 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2102 except for the bottom bits. */
math_DUP_TO_64(IRTemp src,IRType srcTy)2103 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2104 {
2105 if (srcTy == Ity_I8) {
2106 IRTemp t16 = newTemp(Ity_I64);
2107 assign(t16, binop(Iop_Or64, mkexpr(src),
2108 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2109 IRTemp t32 = newTemp(Ity_I64);
2110 assign(t32, binop(Iop_Or64, mkexpr(t16),
2111 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2112 IRTemp t64 = newTemp(Ity_I64);
2113 assign(t64, binop(Iop_Or64, mkexpr(t32),
2114 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2115 return t64;
2116 }
2117 if (srcTy == Ity_I16) {
2118 IRTemp t32 = newTemp(Ity_I64);
2119 assign(t32, binop(Iop_Or64, mkexpr(src),
2120 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2121 IRTemp t64 = newTemp(Ity_I64);
2122 assign(t64, binop(Iop_Or64, mkexpr(t32),
2123 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2124 return t64;
2125 }
2126 if (srcTy == Ity_I32) {
2127 IRTemp t64 = newTemp(Ity_I64);
2128 assign(t64, binop(Iop_Or64, mkexpr(src),
2129 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2130 return t64;
2131 }
2132 if (srcTy == Ity_I64) {
2133 return src;
2134 }
2135 vassert(0);
2136 }
2137
2138
2139 /* Duplicates the src element exactly so as to fill a V128 value. */
math_DUP_TO_V128(IRTemp src,IRType srcTy)2140 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2141 {
2142 IRTemp res = newTempV128();
2143 if (srcTy == Ity_F64) {
2144 IRTemp i64 = newTemp(Ity_I64);
2145 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2146 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2147 return res;
2148 }
2149 if (srcTy == Ity_F32) {
2150 IRTemp i64a = newTemp(Ity_I64);
2151 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2152 IRTemp i64b = newTemp(Ity_I64);
2153 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2154 mkexpr(i64a)));
2155 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2156 return res;
2157 }
2158 if (srcTy == Ity_I64) {
2159 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2160 return res;
2161 }
2162 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2163 IRTemp t1 = newTemp(Ity_I64);
2164 assign(t1, widenUto64(srcTy, mkexpr(src)));
2165 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2166 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2167 return res;
2168 }
2169 vassert(0);
2170 }
2171
2172
2173 /* |fullWidth| is a full V128 width result. Depending on bitQ,
2174 zero out the upper half. */
math_MAYBE_ZERO_HI64(UInt bitQ,IRTemp fullWidth)2175 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2176 {
2177 if (bitQ == 1) return mkexpr(fullWidth);
2178 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2179 vassert(0);
2180 }
2181
2182 /* The same, but from an expression instead. */
math_MAYBE_ZERO_HI64_fromE(UInt bitQ,IRExpr * fullWidth)2183 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2184 {
2185 IRTemp fullWidthT = newTempV128();
2186 assign(fullWidthT, fullWidth);
2187 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2188 }
2189
2190
2191 /*------------------------------------------------------------*/
2192 /*--- FP comparison helpers ---*/
2193 /*------------------------------------------------------------*/
2194
2195 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2196 as an IRCmpF64Result. Generate code to convert it to an
2197 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2198 Assign a new temp to hold that value, and return the temp. */
2199 static
mk_convert_IRCmpF64Result_to_NZCV(IRTemp irRes32)2200 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2201 {
2202 IRTemp ix = newTemp(Ity_I64);
2203 IRTemp termL = newTemp(Ity_I64);
2204 IRTemp termR = newTemp(Ity_I64);
2205 IRTemp nzcv = newTemp(Ity_I64);
2206 IRTemp irRes = newTemp(Ity_I64);
2207
2208 /* This is where the fun starts. We have to convert 'irRes' from
2209 an IR-convention return result (IRCmpF64Result) to an
2210 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2211 4 bits of 'nzcv'. */
2212 /* Map compare result from IR to ARM(nzcv) */
2213 /*
2214 FP cmp result | IR | ARM(nzcv)
2215 --------------------------------
2216 UN 0x45 0011
2217 LT 0x01 1000
2218 GT 0x00 0010
2219 EQ 0x40 0110
2220 */
2221 /* Now since you're probably wondering WTF ..
2222
2223 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2224 places them side by side, giving a number which is 0, 1, 2 or 3.
2225
2226 termL is a sequence cooked up by GNU superopt. It converts ix
2227 into an almost correct value NZCV value (incredibly), except
2228 for the case of UN, where it produces 0100 instead of the
2229 required 0011.
2230
2231 termR is therefore a correction term, also computed from ix. It
2232 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2233 the final correct value, we subtract termR from termL.
2234
2235 Don't take my word for it. There's a test program at the bottom
2236 of guest_arm_toIR.c, to try this out with.
2237 */
2238 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2239
2240 assign(
2241 ix,
2242 binop(Iop_Or64,
2243 binop(Iop_And64,
2244 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2245 mkU64(3)),
2246 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2247
2248 assign(
2249 termL,
2250 binop(Iop_Add64,
2251 binop(Iop_Shr64,
2252 binop(Iop_Sub64,
2253 binop(Iop_Shl64,
2254 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2255 mkU8(62)),
2256 mkU64(1)),
2257 mkU8(61)),
2258 mkU64(1)));
2259
2260 assign(
2261 termR,
2262 binop(Iop_And64,
2263 binop(Iop_And64,
2264 mkexpr(ix),
2265 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2266 mkU64(1)));
2267
2268 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2269 return nzcv;
2270 }
2271
2272
2273 /*------------------------------------------------------------*/
2274 /*--- Data processing (immediate) ---*/
2275 /*------------------------------------------------------------*/
2276
2277 /* Helper functions for supporting "DecodeBitMasks" */
2278
dbm_ROR(Int width,ULong x,Int rot)2279 static ULong dbm_ROR ( Int width, ULong x, Int rot )
2280 {
2281 vassert(width > 0 && width <= 64);
2282 vassert(rot >= 0 && rot < width);
2283 if (rot == 0) return x;
2284 ULong res = x >> rot;
2285 res |= (x << (width - rot));
2286 if (width < 64)
2287 res &= ((1ULL << width) - 1);
2288 return res;
2289 }
2290
dbm_RepTo64(Int esize,ULong x)2291 static ULong dbm_RepTo64( Int esize, ULong x )
2292 {
2293 switch (esize) {
2294 case 64:
2295 return x;
2296 case 32:
2297 x &= 0xFFFFFFFF; x |= (x << 32);
2298 return x;
2299 case 16:
2300 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2301 return x;
2302 case 8:
2303 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2304 return x;
2305 case 4:
2306 x &= 0xF; x |= (x << 4); x |= (x << 8);
2307 x |= (x << 16); x |= (x << 32);
2308 return x;
2309 case 2:
2310 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2311 x |= (x << 16); x |= (x << 32);
2312 return x;
2313 default:
2314 break;
2315 }
2316 vpanic("dbm_RepTo64");
2317 /*NOTREACHED*/
2318 return 0;
2319 }
2320
dbm_highestSetBit(ULong x)2321 static Int dbm_highestSetBit ( ULong x )
2322 {
2323 Int i;
2324 for (i = 63; i >= 0; i--) {
2325 if (x & (1ULL << i))
2326 return i;
2327 }
2328 vassert(x == 0);
2329 return -1;
2330 }
2331
2332 static
dbm_DecodeBitMasks(ULong * wmask,ULong * tmask,ULong immN,ULong imms,ULong immr,Bool immediate,UInt M)2333 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2334 ULong immN, ULong imms, ULong immr, Bool immediate,
2335 UInt M /*32 or 64*/)
2336 {
2337 vassert(immN < (1ULL << 1));
2338 vassert(imms < (1ULL << 6));
2339 vassert(immr < (1ULL << 6));
2340 vassert(immediate == False || immediate == True);
2341 vassert(M == 32 || M == 64);
2342
2343 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2344 if (len < 1) { /* printf("fail1\n"); */ return False; }
2345 vassert(len <= 6);
2346 vassert(M >= (1 << len));
2347
2348 vassert(len >= 1 && len <= 6);
2349 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2350 (1 << len) - 1;
2351 vassert(levels >= 1 && levels <= 63);
2352
2353 if (immediate && ((imms & levels) == levels)) {
2354 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2355 return False;
2356 }
2357
2358 ULong S = imms & levels;
2359 ULong R = immr & levels;
2360 Int diff = S - R;
2361 diff &= 63;
2362 Int esize = 1 << len;
2363 vassert(2 <= esize && esize <= 64);
2364
2365 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2366 same below with d. S can be 63 in which case we have an out of
2367 range and hence undefined shift. */
2368 vassert(S >= 0 && S <= 63);
2369 vassert(esize >= (S+1));
2370 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2371 //(1ULL << (S+1)) - 1;
2372 ((1ULL << S) - 1) + (1ULL << S);
2373
2374 Int d = // diff<len-1:0>
2375 diff & ((1 << len)-1);
2376 vassert(esize >= (d+1));
2377 vassert(d >= 0 && d <= 63);
2378
2379 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2380 //(1ULL << (d+1)) - 1;
2381 ((1ULL << d) - 1) + (1ULL << d);
2382
2383 if (esize != 64) vassert(elem_s < (1ULL << esize));
2384 if (esize != 64) vassert(elem_d < (1ULL << esize));
2385
2386 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2387 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2388
2389 return True;
2390 }
2391
2392
2393 static
dis_ARM64_data_processing_immediate(DisResult * dres,UInt insn)2394 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2395 UInt insn)
2396 {
2397 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2398
2399 /* insn[28:23]
2400 10000x PC-rel addressing
2401 10001x Add/subtract (immediate)
2402 100100 Logical (immediate)
2403 100101 Move Wide (immediate)
2404 100110 Bitfield
2405 100111 Extract
2406 */
2407
2408 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2409 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2410 Bool is64 = INSN(31,31) == 1;
2411 Bool isSub = INSN(30,30) == 1;
2412 Bool setCC = INSN(29,29) == 1;
2413 UInt sh = INSN(23,22);
2414 UInt uimm12 = INSN(21,10);
2415 UInt nn = INSN(9,5);
2416 UInt dd = INSN(4,0);
2417 const HChar* nm = isSub ? "sub" : "add";
2418 if (sh >= 2) {
2419 /* Invalid; fall through */
2420 } else {
2421 vassert(sh <= 1);
2422 uimm12 <<= (12 * sh);
2423 if (is64) {
2424 IRTemp argL = newTemp(Ity_I64);
2425 IRTemp argR = newTemp(Ity_I64);
2426 IRTemp res = newTemp(Ity_I64);
2427 assign(argL, getIReg64orSP(nn));
2428 assign(argR, mkU64(uimm12));
2429 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2430 mkexpr(argL), mkexpr(argR)));
2431 if (setCC) {
2432 putIReg64orZR(dd, mkexpr(res));
2433 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2434 DIP("%ss %s, %s, 0x%x\n",
2435 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2436 } else {
2437 putIReg64orSP(dd, mkexpr(res));
2438 DIP("%s %s, %s, 0x%x\n",
2439 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2440 }
2441 } else {
2442 IRTemp argL = newTemp(Ity_I32);
2443 IRTemp argR = newTemp(Ity_I32);
2444 IRTemp res = newTemp(Ity_I32);
2445 assign(argL, getIReg32orSP(nn));
2446 assign(argR, mkU32(uimm12));
2447 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2448 mkexpr(argL), mkexpr(argR)));
2449 if (setCC) {
2450 putIReg32orZR(dd, mkexpr(res));
2451 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2452 DIP("%ss %s, %s, 0x%x\n",
2453 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2454 } else {
2455 putIReg32orSP(dd, mkexpr(res));
2456 DIP("%s %s, %s, 0x%x\n",
2457 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2458 }
2459 }
2460 return True;
2461 }
2462 }
2463
2464 /* -------------------- ADR/ADRP -------------------- */
2465 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2466 UInt bP = INSN(31,31);
2467 UInt immLo = INSN(30,29);
2468 UInt immHi = INSN(23,5);
2469 UInt rD = INSN(4,0);
2470 ULong uimm = (immHi << 2) | immLo;
2471 ULong simm = sx_to_64(uimm, 21);
2472 ULong val;
2473 if (bP) {
2474 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2475 } else {
2476 val = guest_PC_curr_instr + simm;
2477 }
2478 putIReg64orZR(rD, mkU64(val));
2479 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2480 return True;
2481 }
2482
2483 /* -------------------- LOGIC(imm) -------------------- */
2484 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2485 /* 31 30 28 22 21 15 9 4
2486 sf op 100100 N immr imms Rn Rd
2487 op=00: AND Rd|SP, Rn, #imm
2488 op=01: ORR Rd|SP, Rn, #imm
2489 op=10: EOR Rd|SP, Rn, #imm
2490 op=11: ANDS Rd|ZR, Rn, #imm
2491 */
2492 Bool is64 = INSN(31,31) == 1;
2493 UInt op = INSN(30,29);
2494 UInt N = INSN(22,22);
2495 UInt immR = INSN(21,16);
2496 UInt immS = INSN(15,10);
2497 UInt nn = INSN(9,5);
2498 UInt dd = INSN(4,0);
2499 ULong imm = 0;
2500 Bool ok;
2501 if (N == 1 && !is64)
2502 goto after_logic_imm; /* not allowed; fall through */
2503 ok = dbm_DecodeBitMasks(&imm, NULL,
2504 N, immS, immR, True, is64 ? 64 : 32);
2505 if (!ok)
2506 goto after_logic_imm;
2507
2508 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2509 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2510 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2511
2512 vassert(op < 4);
2513 if (is64) {
2514 IRExpr* argL = getIReg64orZR(nn);
2515 IRExpr* argR = mkU64(imm);
2516 IRTemp res = newTemp(Ity_I64);
2517 assign(res, binop(ops64[op], argL, argR));
2518 if (op < 3) {
2519 putIReg64orSP(dd, mkexpr(res));
2520 DIP("%s %s, %s, 0x%llx\n", names[op],
2521 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2522 } else {
2523 putIReg64orZR(dd, mkexpr(res));
2524 setFlags_LOGIC(True/*is64*/, res);
2525 DIP("%s %s, %s, 0x%llx\n", names[op],
2526 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2527 }
2528 } else {
2529 IRExpr* argL = getIReg32orZR(nn);
2530 IRExpr* argR = mkU32((UInt)imm);
2531 IRTemp res = newTemp(Ity_I32);
2532 assign(res, binop(ops32[op], argL, argR));
2533 if (op < 3) {
2534 putIReg32orSP(dd, mkexpr(res));
2535 DIP("%s %s, %s, 0x%x\n", names[op],
2536 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2537 } else {
2538 putIReg32orZR(dd, mkexpr(res));
2539 setFlags_LOGIC(False/*!is64*/, res);
2540 DIP("%s %s, %s, 0x%x\n", names[op],
2541 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2542 }
2543 }
2544 return True;
2545 }
2546 after_logic_imm:
2547
2548 /* -------------------- MOV{Z,N,K} -------------------- */
2549 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2550 /* 31 30 28 22 20 4
2551 | | | | | |
2552 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2553 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2554 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2555 */
2556 Bool is64 = INSN(31,31) == 1;
2557 UInt subopc = INSN(30,29);
2558 UInt hw = INSN(22,21);
2559 UInt imm16 = INSN(20,5);
2560 UInt dd = INSN(4,0);
2561 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2562 /* invalid; fall through */
2563 } else {
2564 ULong imm64 = ((ULong)imm16) << (16 * hw);
2565 if (!is64)
2566 vassert(imm64 < 0x100000000ULL);
2567 switch (subopc) {
2568 case BITS2(1,0): // MOVZ
2569 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2570 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2571 break;
2572 case BITS2(0,0): // MOVN
2573 imm64 = ~imm64;
2574 if (!is64)
2575 imm64 &= 0xFFFFFFFFULL;
2576 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2577 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2578 break;
2579 case BITS2(1,1): // MOVK
2580 /* This is more complex. We are inserting a slice into
2581 the destination register, so we need to have the old
2582 value of it. */
2583 if (is64) {
2584 IRTemp old = newTemp(Ity_I64);
2585 assign(old, getIReg64orZR(dd));
2586 ULong mask = 0xFFFFULL << (16 * hw);
2587 IRExpr* res
2588 = binop(Iop_Or64,
2589 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2590 mkU64(imm64));
2591 putIReg64orZR(dd, res);
2592 DIP("movk %s, 0x%x, lsl %u\n",
2593 nameIReg64orZR(dd), imm16, 16*hw);
2594 } else {
2595 IRTemp old = newTemp(Ity_I32);
2596 assign(old, getIReg32orZR(dd));
2597 vassert(hw <= 1);
2598 UInt mask = ((UInt)0xFFFF) << (16 * hw);
2599 IRExpr* res
2600 = binop(Iop_Or32,
2601 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2602 mkU32((UInt)imm64));
2603 putIReg32orZR(dd, res);
2604 DIP("movk %s, 0x%x, lsl %u\n",
2605 nameIReg32orZR(dd), imm16, 16*hw);
2606 }
2607 break;
2608 default:
2609 vassert(0);
2610 }
2611 return True;
2612 }
2613 }
2614
2615 /* -------------------- {U,S,}BFM -------------------- */
2616 /* 30 28 22 21 15 9 4
2617
2618 sf 10 100110 N immr imms nn dd
2619 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2620 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2621
2622 sf 00 100110 N immr imms nn dd
2623 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2624 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2625
2626 sf 01 100110 N immr imms nn dd
2627 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2628 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2629 */
2630 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2631 UInt sf = INSN(31,31);
2632 UInt opc = INSN(30,29);
2633 UInt N = INSN(22,22);
2634 UInt immR = INSN(21,16);
2635 UInt immS = INSN(15,10);
2636 UInt nn = INSN(9,5);
2637 UInt dd = INSN(4,0);
2638 Bool inZero = False;
2639 Bool extend = False;
2640 const HChar* nm = "???";
2641 /* skip invalid combinations */
2642 switch (opc) {
2643 case BITS2(0,0):
2644 inZero = True; extend = True; nm = "sbfm"; break;
2645 case BITS2(0,1):
2646 inZero = False; extend = False; nm = "bfm"; break;
2647 case BITS2(1,0):
2648 inZero = True; extend = False; nm = "ubfm"; break;
2649 case BITS2(1,1):
2650 goto after_bfm; /* invalid */
2651 default:
2652 vassert(0);
2653 }
2654 if (sf == 1 && N != 1) goto after_bfm;
2655 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2656 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2657 ULong wmask = 0, tmask = 0;
2658 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2659 N, immS, immR, False, sf == 1 ? 64 : 32);
2660 if (!ok) goto after_bfm; /* hmmm */
2661
2662 Bool is64 = sf == 1;
2663 IRType ty = is64 ? Ity_I64 : Ity_I32;
2664
2665 IRTemp dst = newTemp(ty);
2666 IRTemp src = newTemp(ty);
2667 IRTemp bot = newTemp(ty);
2668 IRTemp top = newTemp(ty);
2669 IRTemp res = newTemp(ty);
2670 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2671 assign(src, getIRegOrZR(is64, nn));
2672 /* perform bitfield move on low bits */
2673 assign(bot, binop(mkOR(ty),
2674 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2675 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2676 mkU(ty, wmask))));
2677 /* determine extension bits (sign, zero or dest register) */
2678 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2679 /* combine extension bits and result bits */
2680 assign(res, binop(mkOR(ty),
2681 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2682 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2683 putIRegOrZR(is64, dd, mkexpr(res));
2684 DIP("%s %s, %s, immR=%u, immS=%u\n",
2685 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2686 return True;
2687 }
2688 after_bfm:
2689
2690 /* ---------------------- EXTR ---------------------- */
2691 /* 30 28 22 20 15 9 4
2692 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2693 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2694 */
2695 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2696 Bool is64 = INSN(31,31) == 1;
2697 UInt mm = INSN(20,16);
2698 UInt imm6 = INSN(15,10);
2699 UInt nn = INSN(9,5);
2700 UInt dd = INSN(4,0);
2701 Bool valid = True;
2702 if (INSN(31,31) != INSN(22,22))
2703 valid = False;
2704 if (!is64 && imm6 >= 32)
2705 valid = False;
2706 if (!valid) goto after_extr;
2707 IRType ty = is64 ? Ity_I64 : Ity_I32;
2708 IRTemp srcHi = newTemp(ty);
2709 IRTemp srcLo = newTemp(ty);
2710 IRTemp res = newTemp(ty);
2711 assign(srcHi, getIRegOrZR(is64, nn));
2712 assign(srcLo, getIRegOrZR(is64, mm));
2713 if (imm6 == 0) {
2714 assign(res, mkexpr(srcLo));
2715 } else {
2716 UInt szBits = 8 * sizeofIRType(ty);
2717 vassert(imm6 > 0 && imm6 < szBits);
2718 assign(res, binop(mkOR(ty),
2719 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2720 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2721 }
2722 putIRegOrZR(is64, dd, mkexpr(res));
2723 DIP("extr %s, %s, %s, #%u\n",
2724 nameIRegOrZR(is64,dd),
2725 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2726 return True;
2727 }
2728 after_extr:
2729
2730 vex_printf("ARM64 front end: data_processing_immediate\n");
2731 return False;
2732 # undef INSN
2733 }
2734
2735
2736 /*------------------------------------------------------------*/
2737 /*--- Data processing (register) instructions ---*/
2738 /*------------------------------------------------------------*/
2739
nameSH(UInt sh)2740 static const HChar* nameSH ( UInt sh ) {
2741 switch (sh) {
2742 case 0: return "lsl";
2743 case 1: return "lsr";
2744 case 2: return "asr";
2745 case 3: return "ror";
2746 default: vassert(0);
2747 }
2748 }
2749
2750 /* Generate IR to get a register value, possibly shifted by an
2751 immediate. Returns either a 32- or 64-bit temporary holding the
2752 result. After the shift, the value can optionally be NOT-ed
2753 too.
2754
2755 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2756 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2757 isn't allowed, but it's the job of the caller to check that.
2758 */
getShiftedIRegOrZR(Bool is64,UInt sh_how,UInt sh_amt,UInt regNo,Bool invert)2759 static IRTemp getShiftedIRegOrZR ( Bool is64,
2760 UInt sh_how, UInt sh_amt, UInt regNo,
2761 Bool invert )
2762 {
2763 vassert(sh_how < 4);
2764 vassert(sh_amt < (is64 ? 64 : 32));
2765 IRType ty = is64 ? Ity_I64 : Ity_I32;
2766 IRTemp t0 = newTemp(ty);
2767 assign(t0, getIRegOrZR(is64, regNo));
2768 IRTemp t1 = newTemp(ty);
2769 switch (sh_how) {
2770 case BITS2(0,0):
2771 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2772 break;
2773 case BITS2(0,1):
2774 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2775 break;
2776 case BITS2(1,0):
2777 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2778 break;
2779 case BITS2(1,1):
2780 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2781 break;
2782 default:
2783 vassert(0);
2784 }
2785 if (invert) {
2786 IRTemp t2 = newTemp(ty);
2787 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2788 return t2;
2789 } else {
2790 return t1;
2791 }
2792 }
2793
2794
2795 static
dis_ARM64_data_processing_register(DisResult * dres,UInt insn)2796 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2797 UInt insn)
2798 {
2799 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2800
2801 /* ------------------- ADD/SUB(reg) ------------------- */
2802 /* x==0 => 32 bit op x==1 => 64 bit op
2803 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2804
2805 31 30 29 28 23 21 20 15 9 4
2806 | | | | | | | | | |
2807 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2808 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2809 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2810 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2811 */
2812 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2813 UInt bX = INSN(31,31);
2814 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2815 UInt bS = INSN(29, 29); /* set flags? */
2816 UInt sh = INSN(23,22);
2817 UInt rM = INSN(20,16);
2818 UInt imm6 = INSN(15,10);
2819 UInt rN = INSN(9,5);
2820 UInt rD = INSN(4,0);
2821 Bool isSUB = bOP == 1;
2822 Bool is64 = bX == 1;
2823 IRType ty = is64 ? Ity_I64 : Ity_I32;
2824 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2825 /* invalid; fall through */
2826 } else {
2827 IRTemp argL = newTemp(ty);
2828 assign(argL, getIRegOrZR(is64, rN));
2829 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2830 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2831 IRTemp res = newTemp(ty);
2832 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2833 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2834 if (bS) {
2835 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2836 }
2837 DIP("%s%s %s, %s, %s, %s #%u\n",
2838 bOP ? "sub" : "add", bS ? "s" : "",
2839 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2840 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2841 return True;
2842 }
2843 }
2844
2845 /* ------------------- ADC/SBC(reg) ------------------- */
2846 /* x==0 => 32 bit op x==1 => 64 bit op
2847
2848 31 30 29 28 23 21 20 15 9 4
2849 | | | | | | | | | |
2850 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2851 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2852 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2853 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2854 */
2855
2856 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2857 UInt bX = INSN(31,31);
2858 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2859 UInt bS = INSN(29,29); /* set flags */
2860 UInt rM = INSN(20,16);
2861 UInt rN = INSN(9,5);
2862 UInt rD = INSN(4,0);
2863
2864 Bool isSUB = bOP == 1;
2865 Bool is64 = bX == 1;
2866 IRType ty = is64 ? Ity_I64 : Ity_I32;
2867
2868 IRTemp oldC = newTemp(ty);
2869 assign(oldC,
2870 is64 ? mk_arm64g_calculate_flag_c()
2871 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2872
2873 IRTemp argL = newTemp(ty);
2874 assign(argL, getIRegOrZR(is64, rN));
2875 IRTemp argR = newTemp(ty);
2876 assign(argR, getIRegOrZR(is64, rM));
2877
2878 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2879 IRTemp res = newTemp(ty);
2880 if (isSUB) {
2881 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2882 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2883 assign(res,
2884 binop(op,
2885 binop(op, mkexpr(argL), mkexpr(argR)),
2886 binop(xorOp, mkexpr(oldC), one)));
2887 } else {
2888 assign(res,
2889 binop(op,
2890 binop(op, mkexpr(argL), mkexpr(argR)),
2891 mkexpr(oldC)));
2892 }
2893
2894 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2895
2896 if (bS) {
2897 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2898 }
2899
2900 DIP("%s%s %s, %s, %s\n",
2901 bOP ? "sbc" : "adc", bS ? "s" : "",
2902 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2903 nameIRegOrZR(is64, rM));
2904 return True;
2905 }
2906
2907 /* -------------------- LOGIC(reg) -------------------- */
2908 /* x==0 => 32 bit op x==1 => 64 bit op
2909 N==0 => inv? is no-op (no inversion)
2910 N==1 => inv? is NOT
2911 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2912
2913 31 30 28 23 21 20 15 9 4
2914 | | | | | | | | |
2915 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2916 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2917 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2918 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2919 With N=1, the names are: BIC ORN EON BICS
2920 */
2921 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2922 UInt bX = INSN(31,31);
2923 UInt sh = INSN(23,22);
2924 UInt bN = INSN(21,21);
2925 UInt rM = INSN(20,16);
2926 UInt imm6 = INSN(15,10);
2927 UInt rN = INSN(9,5);
2928 UInt rD = INSN(4,0);
2929 Bool is64 = bX == 1;
2930 IRType ty = is64 ? Ity_I64 : Ity_I32;
2931 if (!is64 && imm6 > 31) {
2932 /* invalid; fall though */
2933 } else {
2934 IRTemp argL = newTemp(ty);
2935 assign(argL, getIRegOrZR(is64, rN));
2936 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2937 IROp op = Iop_INVALID;
2938 switch (INSN(30,29)) {
2939 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2940 case BITS2(0,1): op = mkOR(ty); break;
2941 case BITS2(1,0): op = mkXOR(ty); break;
2942 default: vassert(0);
2943 }
2944 IRTemp res = newTemp(ty);
2945 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2946 if (INSN(30,29) == BITS2(1,1)) {
2947 setFlags_LOGIC(is64, res);
2948 }
2949 putIRegOrZR(is64, rD, mkexpr(res));
2950
2951 static const HChar* names_op[8]
2952 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2953 vassert(((bN << 2) | INSN(30,29)) < 8);
2954 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2955 /* Special-case the printing of "MOV" */
2956 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2957 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2958 nameIRegOrZR(is64, rM));
2959 } else {
2960 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2961 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2962 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2963 }
2964 return True;
2965 }
2966 }
2967
2968 /* -------------------- {U,S}MULH -------------------- */
2969 /* 31 23 22 20 15 9 4
2970 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2971 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2972 */
2973 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2974 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2975 Bool isU = INSN(23,23) == 1;
2976 UInt mm = INSN(20,16);
2977 UInt nn = INSN(9,5);
2978 UInt dd = INSN(4,0);
2979 putIReg64orZR(dd, unop(Iop_128HIto64,
2980 binop(isU ? Iop_MullU64 : Iop_MullS64,
2981 getIReg64orZR(nn), getIReg64orZR(mm))));
2982 DIP("%cmulh %s, %s, %s\n",
2983 isU ? 'u' : 's',
2984 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2985 return True;
2986 }
2987
2988 /* -------------------- M{ADD,SUB} -------------------- */
2989 /* 31 30 20 15 14 9 4
2990 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2991 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2992 */
2993 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2994 Bool is64 = INSN(31,31) == 1;
2995 UInt mm = INSN(20,16);
2996 Bool isAdd = INSN(15,15) == 0;
2997 UInt aa = INSN(14,10);
2998 UInt nn = INSN(9,5);
2999 UInt dd = INSN(4,0);
3000 if (is64) {
3001 putIReg64orZR(
3002 dd,
3003 binop(isAdd ? Iop_Add64 : Iop_Sub64,
3004 getIReg64orZR(aa),
3005 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3006 } else {
3007 putIReg32orZR(
3008 dd,
3009 binop(isAdd ? Iop_Add32 : Iop_Sub32,
3010 getIReg32orZR(aa),
3011 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3012 }
3013 DIP("%s %s, %s, %s, %s\n",
3014 isAdd ? "madd" : "msub",
3015 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3016 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3017 return True;
3018 }
3019
3020 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3021 /* 31 30 28 20 15 11 9 4
3022 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3023 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3024 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3025 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3026 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3027 */
3028 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3029 Bool is64 = INSN(31,31) == 1;
3030 UInt b30 = INSN(30,30);
3031 UInt mm = INSN(20,16);
3032 UInt cond = INSN(15,12);
3033 UInt b10 = INSN(10,10);
3034 UInt nn = INSN(9,5);
3035 UInt dd = INSN(4,0);
3036 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3037 IRType ty = is64 ? Ity_I64 : Ity_I32;
3038 IRExpr* argL = getIRegOrZR(is64, nn);
3039 IRExpr* argR = getIRegOrZR(is64, mm);
3040 switch (op) {
3041 case BITS2(0,0):
3042 break;
3043 case BITS2(0,1):
3044 argR = binop(mkADD(ty), argR, mkU(ty,1));
3045 break;
3046 case BITS2(1,0):
3047 argR = unop(mkNOT(ty), argR);
3048 break;
3049 case BITS2(1,1):
3050 argR = binop(mkSUB(ty), mkU(ty,0), argR);
3051 break;
3052 default:
3053 vassert(0);
3054 }
3055 putIRegOrZR(
3056 is64, dd,
3057 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3058 argL, argR)
3059 );
3060 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3061 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3062 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3063 nameIRegOrZR(is64, mm), nameCC(cond));
3064 return True;
3065 }
3066
3067 /* -------------- ADD/SUB(extended reg) -------------- */
3068 /* 28 20 15 12 9 4
3069 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3070 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3071
3072 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3073 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3074
3075 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3076 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3077
3078 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3079 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3080
3081 The 'm' operand is extended per opt, thusly:
3082
3083 000 Xm & 0xFF UXTB
3084 001 Xm & 0xFFFF UXTH
3085 010 Xm & (2^32)-1 UXTW
3086 011 Xm UXTX
3087
3088 100 Xm sx from bit 7 SXTB
3089 101 Xm sx from bit 15 SXTH
3090 110 Xm sx from bit 31 SXTW
3091 111 Xm SXTX
3092
3093 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3094 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3095 are the identity operation on Wm.
3096
3097 After extension, the value is shifted left by imm3 bits, which
3098 may only be in the range 0 .. 4 inclusive.
3099 */
3100 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3101 Bool is64 = INSN(31,31) == 1;
3102 Bool isSub = INSN(30,30) == 1;
3103 Bool setCC = INSN(29,29) == 1;
3104 UInt mm = INSN(20,16);
3105 UInt opt = INSN(15,13);
3106 UInt imm3 = INSN(12,10);
3107 UInt nn = INSN(9,5);
3108 UInt dd = INSN(4,0);
3109 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3110 "sxtb", "sxth", "sxtw", "sxtx" };
3111 /* Do almost the same thing in the 32- and 64-bit cases. */
3112 IRTemp xN = newTemp(Ity_I64);
3113 IRTemp xM = newTemp(Ity_I64);
3114 assign(xN, getIReg64orSP(nn));
3115 assign(xM, getIReg64orZR(mm));
3116 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3117 Int shSX = 0;
3118 /* widen Xm .. */
3119 switch (opt) {
3120 case BITS3(0,0,0): // UXTB
3121 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3122 case BITS3(0,0,1): // UXTH
3123 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3124 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3125 if (is64) {
3126 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3127 }
3128 break;
3129 case BITS3(0,1,1): // UXTX -- always a noop
3130 break;
3131 case BITS3(1,0,0): // SXTB
3132 shSX = 56; goto sxTo64;
3133 case BITS3(1,0,1): // SXTH
3134 shSX = 48; goto sxTo64;
3135 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3136 if (is64) {
3137 shSX = 32; goto sxTo64;
3138 }
3139 break;
3140 case BITS3(1,1,1): // SXTX -- always a noop
3141 break;
3142 sxTo64:
3143 vassert(shSX >= 32);
3144 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3145 mkU8(shSX));
3146 break;
3147 default:
3148 vassert(0);
3149 }
3150 /* and now shift */
3151 IRTemp argL = xN;
3152 IRTemp argR = newTemp(Ity_I64);
3153 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3154 IRTemp res = newTemp(Ity_I64);
3155 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3156 mkexpr(argL), mkexpr(argR)));
3157 if (is64) {
3158 if (setCC) {
3159 putIReg64orZR(dd, mkexpr(res));
3160 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3161 } else {
3162 putIReg64orSP(dd, mkexpr(res));
3163 }
3164 } else {
3165 if (setCC) {
3166 IRTemp argL32 = newTemp(Ity_I32);
3167 IRTemp argR32 = newTemp(Ity_I32);
3168 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3169 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3170 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3171 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3172 } else {
3173 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3174 }
3175 }
3176 DIP("%s%s %s, %s, %s %s lsl %u\n",
3177 isSub ? "sub" : "add", setCC ? "s" : "",
3178 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3179 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3180 nameExt[opt], imm3);
3181 return True;
3182 }
3183
3184 /* ---------------- CCMP/CCMN(imm) ---------------- */
3185 /* Bizarrely, these appear in the "data processing register"
3186 category, even though they are operations against an
3187 immediate. */
3188 /* 31 29 20 15 11 9 3
3189 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3190 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3191
3192 Operation is:
3193 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3194 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3195 */
3196 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3197 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3198 Bool is64 = INSN(31,31) == 1;
3199 Bool isSUB = INSN(30,30) == 1;
3200 UInt imm5 = INSN(20,16);
3201 UInt cond = INSN(15,12);
3202 UInt nn = INSN(9,5);
3203 UInt nzcv = INSN(3,0);
3204
3205 IRTemp condT = newTemp(Ity_I1);
3206 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3207
3208 IRType ty = is64 ? Ity_I64 : Ity_I32;
3209 IRTemp argL = newTemp(ty);
3210 IRTemp argR = newTemp(ty);
3211
3212 if (is64) {
3213 assign(argL, getIReg64orZR(nn));
3214 assign(argR, mkU64(imm5));
3215 } else {
3216 assign(argL, getIReg32orZR(nn));
3217 assign(argR, mkU32(imm5));
3218 }
3219 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3220
3221 DIP("ccm%c %s, #%u, #%u, %s\n",
3222 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3223 imm5, nzcv, nameCC(cond));
3224 return True;
3225 }
3226
3227 /* ---------------- CCMP/CCMN(reg) ---------------- */
3228 /* 31 29 20 15 11 9 3
3229 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3230 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3231 Operation is:
3232 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3233 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3234 */
3235 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3236 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3237 Bool is64 = INSN(31,31) == 1;
3238 Bool isSUB = INSN(30,30) == 1;
3239 UInt mm = INSN(20,16);
3240 UInt cond = INSN(15,12);
3241 UInt nn = INSN(9,5);
3242 UInt nzcv = INSN(3,0);
3243
3244 IRTemp condT = newTemp(Ity_I1);
3245 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3246
3247 IRType ty = is64 ? Ity_I64 : Ity_I32;
3248 IRTemp argL = newTemp(ty);
3249 IRTemp argR = newTemp(ty);
3250
3251 if (is64) {
3252 assign(argL, getIReg64orZR(nn));
3253 assign(argR, getIReg64orZR(mm));
3254 } else {
3255 assign(argL, getIReg32orZR(nn));
3256 assign(argR, getIReg32orZR(mm));
3257 }
3258 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3259
3260 DIP("ccm%c %s, %s, #%u, %s\n",
3261 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3262 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3263 return True;
3264 }
3265
3266
3267 /* -------------- REV/REV16/REV32/RBIT -------------- */
3268 /* 31 30 28 20 15 11 9 4
3269
3270 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3271 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
3272
3273 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3274 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
3275
3276 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3277 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
3278
3279 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
3280 */
3281 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3282 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3283 UInt b31 = INSN(31,31);
3284 UInt opc = INSN(11,10);
3285
3286 UInt ix = 0;
3287 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3288 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3289 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3290 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3291 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3292 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3293 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
3294 if (ix >= 1 && ix <= 7) {
3295 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
3296 UInt nn = INSN(9,5);
3297 UInt dd = INSN(4,0);
3298 IRTemp src = newTemp(Ity_I64);
3299 IRTemp dst = IRTemp_INVALID;
3300 IRTemp (*math)(IRTemp) = NULL;
3301 switch (ix) {
3302 case 1: case 2: math = math_BYTESWAP64; break;
3303 case 3: case 4: math = math_BITSWAP64; break;
3304 case 5: case 6: math = math_USHORTSWAP64; break;
3305 case 7: math = math_UINTSWAP64; break;
3306 default: vassert(0);
3307 }
3308 const HChar* names[7]
3309 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3310 const HChar* nm = names[ix-1];
3311 vassert(math);
3312 if (ix == 6) {
3313 /* This has to be special cased, since the logic below doesn't
3314 handle it correctly. */
3315 assign(src, getIReg64orZR(nn));
3316 dst = math(src);
3317 putIReg64orZR(dd,
3318 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3319 } else if (is64) {
3320 assign(src, getIReg64orZR(nn));
3321 dst = math(src);
3322 putIReg64orZR(dd, mkexpr(dst));
3323 } else {
3324 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
3325 dst = math(src);
3326 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3327 }
3328 DIP("%s %s, %s\n", nm,
3329 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3330 return True;
3331 }
3332 /* else fall through */
3333 }
3334
3335 /* -------------------- CLZ/CLS -------------------- */
3336 /* 30 28 24 20 15 9 4
3337 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3338 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3339 */
3340 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3341 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3342 Bool is64 = INSN(31,31) == 1;
3343 Bool isCLS = INSN(10,10) == 1;
3344 UInt nn = INSN(9,5);
3345 UInt dd = INSN(4,0);
3346 IRTemp src = newTemp(Ity_I64);
3347 IRTemp srcZ = newTemp(Ity_I64);
3348 IRTemp dst = newTemp(Ity_I64);
3349 /* Get the argument, widened out to 64 bit */
3350 if (is64) {
3351 assign(src, getIReg64orZR(nn));
3352 } else {
3353 assign(src, binop(Iop_Shl64,
3354 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3355 }
3356 /* If this is CLS, mash the arg around accordingly */
3357 if (isCLS) {
3358 IRExpr* one = mkU8(1);
3359 assign(srcZ,
3360 binop(Iop_Xor64,
3361 binop(Iop_Shl64, mkexpr(src), one),
3362 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3363 } else {
3364 assign(srcZ, mkexpr(src));
3365 }
3366 /* And compute CLZ. */
3367 if (is64) {
3368 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3369 mkU64(isCLS ? 63 : 64),
3370 unop(Iop_Clz64, mkexpr(srcZ))));
3371 putIReg64orZR(dd, mkexpr(dst));
3372 } else {
3373 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3374 mkU64(isCLS ? 31 : 32),
3375 unop(Iop_Clz64, mkexpr(srcZ))));
3376 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3377 }
3378 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3379 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3380 return True;
3381 }
3382
3383 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3384 /* 30 28 20 15 11 9 4
3385 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3386 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3387 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3388 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
3389 */
3390 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3391 && INSN(15,12) == BITS4(0,0,1,0)) {
3392 Bool is64 = INSN(31,31) == 1;
3393 UInt mm = INSN(20,16);
3394 UInt op = INSN(11,10);
3395 UInt nn = INSN(9,5);
3396 UInt dd = INSN(4,0);
3397 IRType ty = is64 ? Ity_I64 : Ity_I32;
3398 IRTemp srcL = newTemp(ty);
3399 IRTemp srcR = newTemp(Ity_I64);
3400 IRTemp res = newTemp(ty);
3401 IROp iop = Iop_INVALID;
3402 assign(srcL, getIRegOrZR(is64, nn));
3403 assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3404 mkU64(is64 ? 63 : 31)));
3405 if (op < 3) {
3406 // LSLV, LSRV, ASRV
3407 switch (op) {
3408 case BITS2(0,0): iop = mkSHL(ty); break;
3409 case BITS2(0,1): iop = mkSHR(ty); break;
3410 case BITS2(1,0): iop = mkSAR(ty); break;
3411 default: vassert(0);
3412 }
3413 assign(res, binop(iop, mkexpr(srcL),
3414 unop(Iop_64to8, mkexpr(srcR))));
3415 } else {
3416 // RORV
3417 IROp opSHL = mkSHL(ty);
3418 IROp opSHR = mkSHR(ty);
3419 IROp opOR = mkOR(ty);
3420 IRExpr* width = mkU64(is64 ? 64: 32);
3421 assign(
3422 res,
3423 IRExpr_ITE(
3424 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3425 mkexpr(srcL),
3426 binop(opOR,
3427 binop(opSHL,
3428 mkexpr(srcL),
3429 unop(Iop_64to8, binop(Iop_Sub64, width,
3430 mkexpr(srcR)))),
3431 binop(opSHR,
3432 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3433 ));
3434 }
3435 putIRegOrZR(is64, dd, mkexpr(res));
3436 vassert(op < 4);
3437 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
3438 DIP("%s %s, %s, %s\n",
3439 names[op], nameIRegOrZR(is64,dd),
3440 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3441 return True;
3442 }
3443
3444 /* -------------------- SDIV/UDIV -------------------- */
3445 /* 30 28 20 15 10 9 4
3446 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3447 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3448 */
3449 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3450 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3451 Bool is64 = INSN(31,31) == 1;
3452 UInt mm = INSN(20,16);
3453 Bool isS = INSN(10,10) == 1;
3454 UInt nn = INSN(9,5);
3455 UInt dd = INSN(4,0);
3456 if (isS) {
3457 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3458 getIRegOrZR(is64, nn),
3459 getIRegOrZR(is64, mm)));
3460 } else {
3461 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3462 getIRegOrZR(is64, nn),
3463 getIRegOrZR(is64, mm)));
3464 }
3465 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3466 nameIRegOrZR(is64, dd),
3467 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3468 return True;
3469 }
3470
3471 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3472 /* 31 23 20 15 14 9 4
3473 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3474 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3475 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3476 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3477 with operation
3478 Xd = Xa +/- (Wn *u/s Wm)
3479 */
3480 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3481 Bool isU = INSN(23,23) == 1;
3482 UInt mm = INSN(20,16);
3483 Bool isAdd = INSN(15,15) == 0;
3484 UInt aa = INSN(14,10);
3485 UInt nn = INSN(9,5);
3486 UInt dd = INSN(4,0);
3487 IRTemp wN = newTemp(Ity_I32);
3488 IRTemp wM = newTemp(Ity_I32);
3489 IRTemp xA = newTemp(Ity_I64);
3490 IRTemp muld = newTemp(Ity_I64);
3491 IRTemp res = newTemp(Ity_I64);
3492 assign(wN, getIReg32orZR(nn));
3493 assign(wM, getIReg32orZR(mm));
3494 assign(xA, getIReg64orZR(aa));
3495 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3496 mkexpr(wN), mkexpr(wM)));
3497 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3498 mkexpr(xA), mkexpr(muld)));
3499 putIReg64orZR(dd, mkexpr(res));
3500 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3501 nameIReg64orZR(dd), nameIReg32orZR(nn),
3502 nameIReg32orZR(mm), nameIReg64orZR(aa));
3503 return True;
3504 }
3505
3506 /* -------------------- CRC32/CRC32C -------------------- */
3507 /* 31 30 20 15 11 9 4
3508 sf 00 1101 0110 m 0100 sz n d CRC32<sz> Wd, Wn, Wm|Xm
3509 sf 00 1101 0110 m 0101 sz n d CRC32C<sz> Wd, Wn, Wm|Xm
3510 */
3511 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3512 && INSN(15,13) == BITS3(0,1,0)) {
3513 UInt bitSF = INSN(31,31);
3514 UInt mm = INSN(20,16);
3515 UInt bitC = INSN(12,12);
3516 UInt sz = INSN(11,10);
3517 UInt nn = INSN(9,5);
3518 UInt dd = INSN(4,0);
3519 vassert(sz >= 0 && sz <= 3);
3520 if ((bitSF == 0 && sz <= BITS2(1,0))
3521 || (bitSF == 1 && sz == BITS2(1,1))) {
3522 UInt ix = (bitC == 1 ? 4 : 0) | sz;
3523 void* helpers[8]
3524 = { &arm64g_calc_crc32b, &arm64g_calc_crc32h,
3525 &arm64g_calc_crc32w, &arm64g_calc_crc32x,
3526 &arm64g_calc_crc32cb, &arm64g_calc_crc32ch,
3527 &arm64g_calc_crc32cw, &arm64g_calc_crc32cx };
3528 const HChar* hNames[8]
3529 = { "arm64g_calc_crc32b", "arm64g_calc_crc32h",
3530 "arm64g_calc_crc32w", "arm64g_calc_crc32x",
3531 "arm64g_calc_crc32cb", "arm64g_calc_crc32ch",
3532 "arm64g_calc_crc32cw", "arm64g_calc_crc32cx" };
3533 const HChar* iNames[8]
3534 = { "crc32b", "crc32h", "crc32w", "crc32x",
3535 "crc32cb", "crc32ch", "crc32cw", "crc32cx" };
3536
3537 IRTemp srcN = newTemp(Ity_I64);
3538 assign(srcN, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
3539
3540 IRTemp srcM = newTemp(Ity_I64);
3541 IRExpr* at64 = getIReg64orZR(mm);
3542 switch (sz) {
3543 case BITS2(0,0):
3544 assign(srcM, binop(Iop_And64, at64, mkU64(0xFF))); break;
3545 case BITS2(0,1):
3546 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFF))); break;
3547 case BITS2(1,0):
3548 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFFFFFF))); break;
3549 case BITS2(1,1):
3550 assign(srcM, at64); break;
3551 default:
3552 vassert(0);
3553 }
3554
3555 vassert(ix >= 0 && ix <= 7);
3556
3557 putIReg64orZR(
3558 dd,
3559 unop(Iop_32Uto64,
3560 unop(Iop_64to32,
3561 mkIRExprCCall(Ity_I64, 0/*regparm*/,
3562 hNames[ix], helpers[ix],
3563 mkIRExprVec_2(mkexpr(srcN),
3564 mkexpr(srcM))))));
3565
3566 DIP("%s %s, %s, %s\n", iNames[ix],
3567 nameIReg32orZR(dd),
3568 nameIReg32orZR(nn), nameIRegOrZR(bitSF == 1, mm));
3569 return True;
3570 }
3571 /* fall through */
3572 }
3573
3574 vex_printf("ARM64 front end: data_processing_register\n");
3575 return False;
3576 # undef INSN
3577 }
3578
3579
3580 /*------------------------------------------------------------*/
3581 /*--- Math helpers for vector interleave/deinterleave ---*/
3582 /*------------------------------------------------------------*/
3583
3584 #define EX(_tmp) \
3585 mkexpr(_tmp)
3586 #define SL(_hi128,_lo128,_nbytes) \
3587 ( (_nbytes) == 0 \
3588 ? (_lo128) \
3589 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3590 #define ROR(_v128,_nbytes) \
3591 SL((_v128),(_v128),(_nbytes))
3592 #define ROL(_v128,_nbytes) \
3593 SL((_v128),(_v128),16-(_nbytes))
3594 #define SHR(_v128,_nbytes) \
3595 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3596 #define SHL(_v128,_nbytes) \
3597 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3598 #define ILO64x2(_argL,_argR) \
3599 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3600 #define IHI64x2(_argL,_argR) \
3601 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3602 #define ILO32x4(_argL,_argR) \
3603 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3604 #define IHI32x4(_argL,_argR) \
3605 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3606 #define ILO16x8(_argL,_argR) \
3607 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3608 #define IHI16x8(_argL,_argR) \
3609 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3610 #define ILO8x16(_argL,_argR) \
3611 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3612 #define IHI8x16(_argL,_argR) \
3613 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3614 #define CEV32x4(_argL,_argR) \
3615 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3616 #define COD32x4(_argL,_argR) \
3617 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3618 #define COD16x8(_argL,_argR) \
3619 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3620 #define COD8x16(_argL,_argR) \
3621 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3622 #define CEV8x16(_argL,_argR) \
3623 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3624 #define AND(_arg1,_arg2) \
3625 binop(Iop_AndV128,(_arg1),(_arg2))
3626 #define OR2(_arg1,_arg2) \
3627 binop(Iop_OrV128,(_arg1),(_arg2))
3628 #define OR3(_arg1,_arg2,_arg3) \
3629 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3630 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3631 binop(Iop_OrV128, \
3632 binop(Iop_OrV128,(_arg1),(_arg2)), \
3633 binop(Iop_OrV128,(_arg3),(_arg4)))
3634
3635
3636 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3637 static
math_INTERLEAVE1_128(IRTemp * i0,UInt laneSzBlg2,IRTemp u0)3638 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3639 UInt laneSzBlg2, IRTemp u0 )
3640 {
3641 assign(*i0, mkexpr(u0));
3642 }
3643
3644
3645 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3646 static
math_INTERLEAVE2_128(IRTemp * i0,IRTemp * i1,UInt laneSzBlg2,IRTemp u0,IRTemp u1)3647 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3648 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3649 {
3650 /* This is pretty easy, since we have primitives directly to
3651 hand. */
3652 if (laneSzBlg2 == 3) {
3653 // 64x2
3654 // u1 == B1 B0, u0 == A1 A0
3655 // i1 == B1 A1, i0 == B0 A0
3656 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3657 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3658 return;
3659 }
3660 if (laneSzBlg2 == 2) {
3661 // 32x4
3662 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3663 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3664 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3665 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3666 return;
3667 }
3668 if (laneSzBlg2 == 1) {
3669 // 16x8
3670 // u1 == B{7..0}, u0 == A{7..0}
3671 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3672 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3673 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3674 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3675 return;
3676 }
3677 if (laneSzBlg2 == 0) {
3678 // 8x16
3679 // u1 == B{f..0}, u0 == A{f..0}
3680 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3681 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3682 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3683 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3684 return;
3685 }
3686 /*NOTREACHED*/
3687 vassert(0);
3688 }
3689
3690
3691 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3692 static
math_INTERLEAVE3_128(IRTemp * i0,IRTemp * i1,IRTemp * i2,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2)3693 void math_INTERLEAVE3_128(
3694 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3695 UInt laneSzBlg2,
3696 IRTemp u0, IRTemp u1, IRTemp u2 )
3697 {
3698 if (laneSzBlg2 == 3) {
3699 // 64x2
3700 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3701 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3702 assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3703 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3704 assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3705 return;
3706 }
3707
3708 if (laneSzBlg2 == 2) {
3709 // 32x4
3710 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3711 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3712 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3713 IRTemp p0 = newTempV128();
3714 IRTemp p1 = newTempV128();
3715 IRTemp p2 = newTempV128();
3716 IRTemp c1100 = newTempV128();
3717 IRTemp c0011 = newTempV128();
3718 IRTemp c0110 = newTempV128();
3719 assign(c1100, mkV128(0xFF00));
3720 assign(c0011, mkV128(0x00FF));
3721 assign(c0110, mkV128(0x0FF0));
3722 // First interleave them at 64x2 granularity,
3723 // generating partial ("p") values.
3724 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3725 // And more shuffling around for the final answer
3726 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3727 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3728 assign(*i1, OR3( SHL(EX(p2),12),
3729 AND(EX(p1),EX(c0110)),
3730 SHR(EX(p0),12) ));
3731 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3732 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3733 return;
3734 }
3735
3736 if (laneSzBlg2 == 1) {
3737 // 16x8
3738 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3739 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3740 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3741 //
3742 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3743 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3744 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3745 //
3746 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3747 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3748 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3749 IRTemp p0 = newTempV128();
3750 IRTemp p1 = newTempV128();
3751 IRTemp p2 = newTempV128();
3752 IRTemp c1000 = newTempV128();
3753 IRTemp c0100 = newTempV128();
3754 IRTemp c0010 = newTempV128();
3755 IRTemp c0001 = newTempV128();
3756 assign(c1000, mkV128(0xF000));
3757 assign(c0100, mkV128(0x0F00));
3758 assign(c0010, mkV128(0x00F0));
3759 assign(c0001, mkV128(0x000F));
3760 // First interleave them at 32x4 granularity,
3761 // generating partial ("p") values.
3762 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3763 // And more shuffling around for the final answer
3764 assign(*i2,
3765 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ),
3766 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ),
3767 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3768 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3769 ));
3770 assign(*i1,
3771 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3772 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ),
3773 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3774 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3775 ));
3776 assign(*i0,
3777 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3778 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3779 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3780 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3781 ));
3782 return;
3783 }
3784
3785 if (laneSzBlg2 == 0) {
3786 // 8x16. It doesn't seem worth the hassle of first doing a
3787 // 16x8 interleave, so just generate all 24 partial results
3788 // directly :-(
3789 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3790 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3791 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3792 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3793
3794 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3795 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3796 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3797 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3798 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3799 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3800 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3801 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3802 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3803
3804 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3805 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3806 //
3807 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3808 IRTemp t_##_tempName = newTempV128(); \
3809 assign(t_##_tempName, \
3810 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3811 ROR(EX(_srcVec2),(_srcShift2)) ) )
3812
3813 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3814 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3815
3816 // The slicing and reassembly are done as interleavedly as possible,
3817 // so as to minimise the demand for registers in the back end, which
3818 // was observed to be a problem in testing.
3819
3820 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3821 XXXX(AfCe, AA, 0xf, CC, 0xe);
3822 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3823
3824 XXXX(BeAe, BB, 0xe, AA, 0xe);
3825 XXXX(CdBd, CC, 0xd, BB, 0xd);
3826 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3827 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3828
3829 XXXX(AdCc, AA, 0xd, CC, 0xc);
3830 XXXX(BcAc, BB, 0xc, AA, 0xc);
3831 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3832
3833 XXXX(CbBb, CC, 0xb, BB, 0xb);
3834 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3835 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3836 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3837 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3838
3839 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3840 XXXX(C9B9, CC, 0x9, BB, 0x9);
3841 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3842
3843 XXXX(A9C8, AA, 0x9, CC, 0x8);
3844 XXXX(B8A8, BB, 0x8, AA, 0x8);
3845 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3846 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3847
3848 XXXX(C7B7, CC, 0x7, BB, 0x7);
3849 XXXX(A7C6, AA, 0x7, CC, 0x6);
3850 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3851
3852 XXXX(B6A6, BB, 0x6, AA, 0x6);
3853 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3854 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3855 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3856 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3857
3858 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3859 XXXX(B4A4, BB, 0x4, AA, 0x4);
3860 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3861
3862 XXXX(C3B3, CC, 0x3, BB, 0x3);
3863 XXXX(A3C2, AA, 0x3, CC, 0x2);
3864 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3865 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
3866
3867 XXXX(B2A2, BB, 0x2, AA, 0x2);
3868 XXXX(C1B1, CC, 0x1, BB, 0x1);
3869 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
3870
3871 XXXX(A1C0, AA, 0x1, CC, 0x0);
3872 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
3873 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
3874 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
3875 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
3876
3877 # undef XXXX
3878 return;
3879 }
3880
3881 /*NOTREACHED*/
3882 vassert(0);
3883 }
3884
3885
3886 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3887 static
math_INTERLEAVE4_128(IRTemp * i0,IRTemp * i1,IRTemp * i2,IRTemp * i3,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2,IRTemp u3)3888 void math_INTERLEAVE4_128(
3889 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
3890 UInt laneSzBlg2,
3891 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
3892 {
3893 if (laneSzBlg2 == 3) {
3894 // 64x2
3895 assign(*i0, ILO64x2(EX(u1), EX(u0)));
3896 assign(*i1, ILO64x2(EX(u3), EX(u2)));
3897 assign(*i2, IHI64x2(EX(u1), EX(u0)));
3898 assign(*i3, IHI64x2(EX(u3), EX(u2)));
3899 return;
3900 }
3901 if (laneSzBlg2 == 2) {
3902 // 32x4
3903 // First, interleave at the 64-bit lane size.
3904 IRTemp p0 = newTempV128();
3905 IRTemp p1 = newTempV128();
3906 IRTemp p2 = newTempV128();
3907 IRTemp p3 = newTempV128();
3908 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
3909 // And interleave (cat) at the 32 bit size.
3910 assign(*i0, CEV32x4(EX(p1), EX(p0)));
3911 assign(*i1, COD32x4(EX(p1), EX(p0)));
3912 assign(*i2, CEV32x4(EX(p3), EX(p2)));
3913 assign(*i3, COD32x4(EX(p3), EX(p2)));
3914 return;
3915 }
3916 if (laneSzBlg2 == 1) {
3917 // 16x8
3918 // First, interleave at the 32-bit lane size.
3919 IRTemp p0 = newTempV128();
3920 IRTemp p1 = newTempV128();
3921 IRTemp p2 = newTempV128();
3922 IRTemp p3 = newTempV128();
3923 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
3924 // And rearrange within each vector, to get the right 16 bit lanes.
3925 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
3926 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
3927 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
3928 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
3929 return;
3930 }
3931 if (laneSzBlg2 == 0) {
3932 // 8x16
3933 // First, interleave at the 16-bit lane size.
3934 IRTemp p0 = newTempV128();
3935 IRTemp p1 = newTempV128();
3936 IRTemp p2 = newTempV128();
3937 IRTemp p3 = newTempV128();
3938 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
3939 // And rearrange within each vector, to get the right 8 bit lanes.
3940 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
3941 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
3942 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
3943 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
3944 return;
3945 }
3946 /*NOTREACHED*/
3947 vassert(0);
3948 }
3949
3950
3951 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3952 static
math_DEINTERLEAVE1_128(IRTemp * u0,UInt laneSzBlg2,IRTemp i0)3953 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
3954 UInt laneSzBlg2, IRTemp i0 )
3955 {
3956 assign(*u0, mkexpr(i0));
3957 }
3958
3959
3960 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3961 static
math_DEINTERLEAVE2_128(IRTemp * u0,IRTemp * u1,UInt laneSzBlg2,IRTemp i0,IRTemp i1)3962 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
3963 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
3964 {
3965 /* This is pretty easy, since we have primitives directly to
3966 hand. */
3967 if (laneSzBlg2 == 3) {
3968 // 64x2
3969 // i1 == B1 A1, i0 == B0 A0
3970 // u1 == B1 B0, u0 == A1 A0
3971 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
3972 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
3973 return;
3974 }
3975 if (laneSzBlg2 == 2) {
3976 // 32x4
3977 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3978 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3979 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
3980 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
3981 return;
3982 }
3983 if (laneSzBlg2 == 1) {
3984 // 16x8
3985 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3986 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3987 // u1 == B{7..0}, u0 == A{7..0}
3988 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
3989 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0)));
3990 return;
3991 }
3992 if (laneSzBlg2 == 0) {
3993 // 8x16
3994 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3995 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3996 // u1 == B{f..0}, u0 == A{f..0}
3997 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
3998 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0)));
3999 return;
4000 }
4001 /*NOTREACHED*/
4002 vassert(0);
4003 }
4004
4005
4006 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
4007 static
math_DEINTERLEAVE3_128(IRTemp * u0,IRTemp * u1,IRTemp * u2,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2)4008 void math_DEINTERLEAVE3_128(
4009 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4010 UInt laneSzBlg2,
4011 IRTemp i0, IRTemp i1, IRTemp i2 )
4012 {
4013 if (laneSzBlg2 == 3) {
4014 // 64x2
4015 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
4016 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
4017 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) ));
4018 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) ));
4019 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) ));
4020 return;
4021 }
4022
4023 if (laneSzBlg2 == 2) {
4024 // 32x4
4025 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
4026 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
4027 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
4028 IRTemp t_a1c0b0a0 = newTempV128();
4029 IRTemp t_a2c1b1a1 = newTempV128();
4030 IRTemp t_a3c2b2a2 = newTempV128();
4031 IRTemp t_a0c3b3a3 = newTempV128();
4032 IRTemp p0 = newTempV128();
4033 IRTemp p1 = newTempV128();
4034 IRTemp p2 = newTempV128();
4035 // Compute some intermediate values.
4036 assign(t_a1c0b0a0, EX(i0));
4037 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
4038 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
4039 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
4040 // First deinterleave into lane-pairs
4041 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
4042 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
4043 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
4044 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
4045 // Then deinterleave at 64x2 granularity.
4046 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
4047 return;
4048 }
4049
4050 if (laneSzBlg2 == 1) {
4051 // 16x8
4052 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
4053 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
4054 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
4055 //
4056 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
4057 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
4058 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
4059 //
4060 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
4061 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
4062 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
4063
4064 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
4065 s0 = s1 = s2 = s3
4066 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
4067 newTempsV128_4(&s0, &s1, &s2, &s3);
4068 newTempsV128_4(&t0, &t1, &t2, &t3);
4069 newTempsV128_4(&p0, &p1, &p2, &c00111111);
4070
4071 // s0 == b2a2 c1b1a1 c0b0a0
4072 // s1 == b4a4 c3b3c3 c2b2a2
4073 // s2 == b6a6 c5b5a5 c4b4a4
4074 // s3 == b0a0 c7b7a7 c6b6a6
4075 assign(s0, EX(i0));
4076 assign(s1, SL(EX(i1),EX(i0),6*2));
4077 assign(s2, SL(EX(i2),EX(i1),4*2));
4078 assign(s3, SL(EX(i0),EX(i2),2*2));
4079
4080 // t0 == 0 0 c1c0 b1b0 a1a0
4081 // t1 == 0 0 c3c2 b3b2 a3a2
4082 // t2 == 0 0 c5c4 b5b4 a5a4
4083 // t3 == 0 0 c7c6 b7b6 a7a6
4084 assign(c00111111, mkV128(0x0FFF));
4085 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4086 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4087 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4088 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4089
4090 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2)));
4091 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4092 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4093
4094 // Then deinterleave at 32x4 granularity.
4095 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4096 return;
4097 }
4098
4099 if (laneSzBlg2 == 0) {
4100 // 8x16. This is the same scheme as for 16x8, with twice the
4101 // number of intermediate values.
4102 //
4103 // u2 == C{f..0}
4104 // u1 == B{f..0}
4105 // u0 == A{f..0}
4106 //
4107 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4108 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4109 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4110 //
4111 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4112 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4113 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4114 //
4115 IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4116 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4117 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4118 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4119 = IRTemp_INVALID;
4120 newTempsV128_4(&s0, &s1, &s2, &s3);
4121 newTempsV128_4(&s4, &s5, &s6, &s7);
4122 newTempsV128_4(&t0, &t1, &t2, &t3);
4123 newTempsV128_4(&t4, &t5, &t6, &t7);
4124 newTempsV128_4(&p0, &p1, &p2, &cMASK);
4125
4126 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4127 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4128 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4129 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4130 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4131 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4132 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4133 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4134 assign(s0, SL(EX(i1),EX(i0), 0));
4135 assign(s1, SL(EX(i1),EX(i0), 6));
4136 assign(s2, SL(EX(i1),EX(i0),12));
4137 assign(s3, SL(EX(i2),EX(i1), 2));
4138 assign(s4, SL(EX(i2),EX(i1), 8));
4139 assign(s5, SL(EX(i2),EX(i1),14));
4140 assign(s6, SL(EX(i0),EX(i2), 4));
4141 assign(s7, SL(EX(i0),EX(i2),10));
4142
4143 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4144 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4145 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4146 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4147 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4148 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4149 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4150 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4151 assign(cMASK, mkV128(0x003F));
4152 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4153 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4154 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4155 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4156 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4157 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4158 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4159 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4160
4161 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4162 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4163 SHL(EX(t3),2), SHR(EX(t2),4) ));
4164 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4165
4166 // Then deinterleave at 16x8 granularity.
4167 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4168 return;
4169 }
4170
4171 /*NOTREACHED*/
4172 vassert(0);
4173 }
4174
4175
4176 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4177 static
math_DEINTERLEAVE4_128(IRTemp * u0,IRTemp * u1,IRTemp * u2,IRTemp * u3,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2,IRTemp i3)4178 void math_DEINTERLEAVE4_128(
4179 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4180 UInt laneSzBlg2,
4181 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4182 {
4183 if (laneSzBlg2 == 3) {
4184 // 64x2
4185 assign(*u0, ILO64x2(EX(i2), EX(i0)));
4186 assign(*u1, IHI64x2(EX(i2), EX(i0)));
4187 assign(*u2, ILO64x2(EX(i3), EX(i1)));
4188 assign(*u3, IHI64x2(EX(i3), EX(i1)));
4189 return;
4190 }
4191 if (laneSzBlg2 == 2) {
4192 // 32x4
4193 IRTemp p0 = newTempV128();
4194 IRTemp p2 = newTempV128();
4195 IRTemp p1 = newTempV128();
4196 IRTemp p3 = newTempV128();
4197 assign(p0, ILO32x4(EX(i1), EX(i0)));
4198 assign(p1, IHI32x4(EX(i1), EX(i0)));
4199 assign(p2, ILO32x4(EX(i3), EX(i2)));
4200 assign(p3, IHI32x4(EX(i3), EX(i2)));
4201 // And now do what we did for the 64-bit case.
4202 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4203 return;
4204 }
4205 if (laneSzBlg2 == 1) {
4206 // 16x8
4207 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4208 IRTemp p0 = newTempV128();
4209 IRTemp p1 = newTempV128();
4210 IRTemp p2 = newTempV128();
4211 IRTemp p3 = newTempV128();
4212 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4213 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4214 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4215 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4216 // From here on is like the 32 bit case.
4217 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4218 return;
4219 }
4220 if (laneSzBlg2 == 0) {
4221 // 8x16
4222 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4223 IRTemp p0 = newTempV128();
4224 IRTemp p1 = newTempV128();
4225 IRTemp p2 = newTempV128();
4226 IRTemp p3 = newTempV128();
4227 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4228 ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4229 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4230 ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4231 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4232 ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4233 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4234 ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4235 // From here on is like the 16 bit case.
4236 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4237 return;
4238 }
4239 /*NOTREACHED*/
4240 vassert(0);
4241 }
4242
4243
4244 /* Wrappers that use the full-width (de)interleavers to do half-width
4245 (de)interleaving. The scheme is to clone each input lane in the
4246 lower half of each incoming value, do a full width (de)interleave
4247 at the next lane size up, and remove every other lane of the the
4248 result. The returned values may have any old junk in the upper
4249 64 bits -- the caller must ignore that. */
4250
4251 /* Helper function -- get doubling and narrowing operations. */
4252 static
math_get_doubler_and_halver(IROp * doubler,IROp * halver,UInt laneSzBlg2)4253 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4254 /*OUT*/IROp* halver,
4255 UInt laneSzBlg2 )
4256 {
4257 switch (laneSzBlg2) {
4258 case 2:
4259 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4260 break;
4261 case 1:
4262 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4263 break;
4264 case 0:
4265 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4266 break;
4267 default:
4268 vassert(0);
4269 }
4270 }
4271
4272 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4273 static
math_INTERLEAVE1_64(IRTemp * i0,UInt laneSzBlg2,IRTemp u0)4274 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4275 UInt laneSzBlg2, IRTemp u0 )
4276 {
4277 assign(*i0, mkexpr(u0));
4278 }
4279
4280
4281 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4282 static
math_INTERLEAVE2_64(IRTemp * i0,IRTemp * i1,UInt laneSzBlg2,IRTemp u0,IRTemp u1)4283 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4284 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4285 {
4286 if (laneSzBlg2 == 3) {
4287 // 1x64, degenerate case
4288 assign(*i0, EX(u0));
4289 assign(*i1, EX(u1));
4290 return;
4291 }
4292
4293 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4294 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4295 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4296
4297 IRTemp du0 = newTempV128();
4298 IRTemp du1 = newTempV128();
4299 assign(du0, binop(doubler, EX(u0), EX(u0)));
4300 assign(du1, binop(doubler, EX(u1), EX(u1)));
4301 IRTemp di0 = newTempV128();
4302 IRTemp di1 = newTempV128();
4303 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4304 assign(*i0, binop(halver, EX(di0), EX(di0)));
4305 assign(*i1, binop(halver, EX(di1), EX(di1)));
4306 }
4307
4308
4309 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4310 static
math_INTERLEAVE3_64(IRTemp * i0,IRTemp * i1,IRTemp * i2,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2)4311 void math_INTERLEAVE3_64(
4312 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4313 UInt laneSzBlg2,
4314 IRTemp u0, IRTemp u1, IRTemp u2 )
4315 {
4316 if (laneSzBlg2 == 3) {
4317 // 1x64, degenerate case
4318 assign(*i0, EX(u0));
4319 assign(*i1, EX(u1));
4320 assign(*i2, EX(u2));
4321 return;
4322 }
4323
4324 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4325 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4326 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4327
4328 IRTemp du0 = newTempV128();
4329 IRTemp du1 = newTempV128();
4330 IRTemp du2 = newTempV128();
4331 assign(du0, binop(doubler, EX(u0), EX(u0)));
4332 assign(du1, binop(doubler, EX(u1), EX(u1)));
4333 assign(du2, binop(doubler, EX(u2), EX(u2)));
4334 IRTemp di0 = newTempV128();
4335 IRTemp di1 = newTempV128();
4336 IRTemp di2 = newTempV128();
4337 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4338 assign(*i0, binop(halver, EX(di0), EX(di0)));
4339 assign(*i1, binop(halver, EX(di1), EX(di1)));
4340 assign(*i2, binop(halver, EX(di2), EX(di2)));
4341 }
4342
4343
4344 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4345 static
math_INTERLEAVE4_64(IRTemp * i0,IRTemp * i1,IRTemp * i2,IRTemp * i3,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2,IRTemp u3)4346 void math_INTERLEAVE4_64(
4347 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4348 UInt laneSzBlg2,
4349 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4350 {
4351 if (laneSzBlg2 == 3) {
4352 // 1x64, degenerate case
4353 assign(*i0, EX(u0));
4354 assign(*i1, EX(u1));
4355 assign(*i2, EX(u2));
4356 assign(*i3, EX(u3));
4357 return;
4358 }
4359
4360 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4361 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4362 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4363
4364 IRTemp du0 = newTempV128();
4365 IRTemp du1 = newTempV128();
4366 IRTemp du2 = newTempV128();
4367 IRTemp du3 = newTempV128();
4368 assign(du0, binop(doubler, EX(u0), EX(u0)));
4369 assign(du1, binop(doubler, EX(u1), EX(u1)));
4370 assign(du2, binop(doubler, EX(u2), EX(u2)));
4371 assign(du3, binop(doubler, EX(u3), EX(u3)));
4372 IRTemp di0 = newTempV128();
4373 IRTemp di1 = newTempV128();
4374 IRTemp di2 = newTempV128();
4375 IRTemp di3 = newTempV128();
4376 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4377 laneSzBlg2 + 1, du0, du1, du2, du3);
4378 assign(*i0, binop(halver, EX(di0), EX(di0)));
4379 assign(*i1, binop(halver, EX(di1), EX(di1)));
4380 assign(*i2, binop(halver, EX(di2), EX(di2)));
4381 assign(*i3, binop(halver, EX(di3), EX(di3)));
4382 }
4383
4384
4385 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4386 static
math_DEINTERLEAVE1_64(IRTemp * u0,UInt laneSzBlg2,IRTemp i0)4387 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4388 UInt laneSzBlg2, IRTemp i0 )
4389 {
4390 assign(*u0, mkexpr(i0));
4391 }
4392
4393
4394 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4395 static
math_DEINTERLEAVE2_64(IRTemp * u0,IRTemp * u1,UInt laneSzBlg2,IRTemp i0,IRTemp i1)4396 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4397 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4398 {
4399 if (laneSzBlg2 == 3) {
4400 // 1x64, degenerate case
4401 assign(*u0, EX(i0));
4402 assign(*u1, EX(i1));
4403 return;
4404 }
4405
4406 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4407 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4408 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4409
4410 IRTemp di0 = newTempV128();
4411 IRTemp di1 = newTempV128();
4412 assign(di0, binop(doubler, EX(i0), EX(i0)));
4413 assign(di1, binop(doubler, EX(i1), EX(i1)));
4414
4415 IRTemp du0 = newTempV128();
4416 IRTemp du1 = newTempV128();
4417 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4418 assign(*u0, binop(halver, EX(du0), EX(du0)));
4419 assign(*u1, binop(halver, EX(du1), EX(du1)));
4420 }
4421
4422
4423 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4424 static
math_DEINTERLEAVE3_64(IRTemp * u0,IRTemp * u1,IRTemp * u2,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2)4425 void math_DEINTERLEAVE3_64(
4426 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4427 UInt laneSzBlg2,
4428 IRTemp i0, IRTemp i1, IRTemp i2 )
4429 {
4430 if (laneSzBlg2 == 3) {
4431 // 1x64, degenerate case
4432 assign(*u0, EX(i0));
4433 assign(*u1, EX(i1));
4434 assign(*u2, EX(i2));
4435 return;
4436 }
4437
4438 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4439 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4440 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4441
4442 IRTemp di0 = newTempV128();
4443 IRTemp di1 = newTempV128();
4444 IRTemp di2 = newTempV128();
4445 assign(di0, binop(doubler, EX(i0), EX(i0)));
4446 assign(di1, binop(doubler, EX(i1), EX(i1)));
4447 assign(di2, binop(doubler, EX(i2), EX(i2)));
4448 IRTemp du0 = newTempV128();
4449 IRTemp du1 = newTempV128();
4450 IRTemp du2 = newTempV128();
4451 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4452 assign(*u0, binop(halver, EX(du0), EX(du0)));
4453 assign(*u1, binop(halver, EX(du1), EX(du1)));
4454 assign(*u2, binop(halver, EX(du2), EX(du2)));
4455 }
4456
4457
4458 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4459 static
math_DEINTERLEAVE4_64(IRTemp * u0,IRTemp * u1,IRTemp * u2,IRTemp * u3,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2,IRTemp i3)4460 void math_DEINTERLEAVE4_64(
4461 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4462 UInt laneSzBlg2,
4463 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4464 {
4465 if (laneSzBlg2 == 3) {
4466 // 1x64, degenerate case
4467 assign(*u0, EX(i0));
4468 assign(*u1, EX(i1));
4469 assign(*u2, EX(i2));
4470 assign(*u3, EX(i3));
4471 return;
4472 }
4473
4474 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4475 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4476 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4477
4478 IRTemp di0 = newTempV128();
4479 IRTemp di1 = newTempV128();
4480 IRTemp di2 = newTempV128();
4481 IRTemp di3 = newTempV128();
4482 assign(di0, binop(doubler, EX(i0), EX(i0)));
4483 assign(di1, binop(doubler, EX(i1), EX(i1)));
4484 assign(di2, binop(doubler, EX(i2), EX(i2)));
4485 assign(di3, binop(doubler, EX(i3), EX(i3)));
4486 IRTemp du0 = newTempV128();
4487 IRTemp du1 = newTempV128();
4488 IRTemp du2 = newTempV128();
4489 IRTemp du3 = newTempV128();
4490 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4491 laneSzBlg2 + 1, di0, di1, di2, di3);
4492 assign(*u0, binop(halver, EX(du0), EX(du0)));
4493 assign(*u1, binop(halver, EX(du1), EX(du1)));
4494 assign(*u2, binop(halver, EX(du2), EX(du2)));
4495 assign(*u3, binop(halver, EX(du3), EX(du3)));
4496 }
4497
4498
4499 #undef EX
4500 #undef SL
4501 #undef ROR
4502 #undef ROL
4503 #undef SHR
4504 #undef SHL
4505 #undef ILO64x2
4506 #undef IHI64x2
4507 #undef ILO32x4
4508 #undef IHI32x4
4509 #undef ILO16x8
4510 #undef IHI16x8
4511 #undef ILO16x8
4512 #undef IHI16x8
4513 #undef CEV32x4
4514 #undef COD32x4
4515 #undef COD16x8
4516 #undef COD8x16
4517 #undef CEV8x16
4518 #undef AND
4519 #undef OR2
4520 #undef OR3
4521 #undef OR4
4522
4523
4524 /*------------------------------------------------------------*/
4525 /*--- Load and Store instructions ---*/
4526 /*------------------------------------------------------------*/
4527
4528 /* Generate the EA for a "reg + reg" style amode. This is done from
4529 parts of the insn, but for sanity checking sake it takes the whole
4530 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4531 and S=insn[12]:
4532
4533 The possible forms, along with their opt:S values, are:
4534 011:0 Xn|SP + Xm
4535 111:0 Xn|SP + Xm
4536 011:1 Xn|SP + Xm * transfer_szB
4537 111:1 Xn|SP + Xm * transfer_szB
4538 010:0 Xn|SP + 32Uto64(Wm)
4539 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4540 110:0 Xn|SP + 32Sto64(Wm)
4541 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4542
4543 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4544 the transfer size is insn[23,31,30]. For integer loads/stores,
4545 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4546
4547 If the decoding fails, it returns IRTemp_INVALID.
4548
4549 isInt is True iff this is decoding is for transfers to/from integer
4550 registers. If False it is for transfers to/from vector registers.
4551 */
gen_indexed_EA(HChar * buf,UInt insn,Bool isInt)4552 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4553 {
4554 UInt optS = SLICE_UInt(insn, 15, 12);
4555 UInt mm = SLICE_UInt(insn, 20, 16);
4556 UInt nn = SLICE_UInt(insn, 9, 5);
4557 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4558 | SLICE_UInt(insn, 31, 30); // Log2 of the size
4559
4560 buf[0] = 0;
4561
4562 /* Sanity checks, that this really is a load/store insn. */
4563 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4564 goto fail;
4565
4566 if (isInt
4567 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4568 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4569 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4570 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4571 goto fail;
4572
4573 if (!isInt
4574 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4575 goto fail;
4576
4577 /* Throw out non-verified but possibly valid cases. */
4578 switch (szLg2) {
4579 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4580 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4581 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4582 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4583 case BITS3(1,0,0): // can only ever be valid for the vector case
4584 if (isInt) goto fail; else break;
4585 case BITS3(1,0,1): // these sizes are never valid
4586 case BITS3(1,1,0):
4587 case BITS3(1,1,1): goto fail;
4588
4589 default: vassert(0);
4590 }
4591
4592 IRExpr* rhs = NULL;
4593 switch (optS) {
4594 case BITS4(1,1,1,0): goto fail; //ATC
4595 case BITS4(0,1,1,0):
4596 rhs = getIReg64orZR(mm);
4597 vex_sprintf(buf, "[%s, %s]",
4598 nameIReg64orZR(nn), nameIReg64orZR(mm));
4599 break;
4600 case BITS4(1,1,1,1): goto fail; //ATC
4601 case BITS4(0,1,1,1):
4602 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4603 vex_sprintf(buf, "[%s, %s lsl %u]",
4604 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4605 break;
4606 case BITS4(0,1,0,0):
4607 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4608 vex_sprintf(buf, "[%s, %s uxtx]",
4609 nameIReg64orZR(nn), nameIReg32orZR(mm));
4610 break;
4611 case BITS4(0,1,0,1):
4612 rhs = binop(Iop_Shl64,
4613 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4614 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4615 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4616 break;
4617 case BITS4(1,1,0,0):
4618 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4619 vex_sprintf(buf, "[%s, %s sxtx]",
4620 nameIReg64orZR(nn), nameIReg32orZR(mm));
4621 break;
4622 case BITS4(1,1,0,1):
4623 rhs = binop(Iop_Shl64,
4624 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4625 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4626 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4627 break;
4628 default:
4629 /* The rest appear to be genuinely invalid */
4630 goto fail;
4631 }
4632
4633 vassert(rhs);
4634 IRTemp res = newTemp(Ity_I64);
4635 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4636 return res;
4637
4638 fail:
4639 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4640 return IRTemp_INVALID;
4641 }
4642
4643
4644 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4645 bits of DATAE :: Ity_I64. */
gen_narrowing_store(UInt szB,IRTemp addr,IRExpr * dataE)4646 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4647 {
4648 IRExpr* addrE = mkexpr(addr);
4649 switch (szB) {
4650 case 8:
4651 storeLE(addrE, dataE);
4652 break;
4653 case 4:
4654 storeLE(addrE, unop(Iop_64to32, dataE));
4655 break;
4656 case 2:
4657 storeLE(addrE, unop(Iop_64to16, dataE));
4658 break;
4659 case 1:
4660 storeLE(addrE, unop(Iop_64to8, dataE));
4661 break;
4662 default:
4663 vassert(0);
4664 }
4665 }
4666
4667
4668 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4669 placing the result in an Ity_I64 temporary. */
gen_zwidening_load(UInt szB,IRTemp addr)4670 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4671 {
4672 IRTemp res = newTemp(Ity_I64);
4673 IRExpr* addrE = mkexpr(addr);
4674 switch (szB) {
4675 case 8:
4676 assign(res, loadLE(Ity_I64,addrE));
4677 break;
4678 case 4:
4679 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4680 break;
4681 case 2:
4682 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4683 break;
4684 case 1:
4685 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4686 break;
4687 default:
4688 vassert(0);
4689 }
4690 return res;
4691 }
4692
4693
4694 /* Generate a "standard 7" name, from bitQ and size. But also
4695 allow ".1d" since that's occasionally useful. */
4696 static
nameArr_Q_SZ(UInt bitQ,UInt size)4697 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4698 {
4699 vassert(bitQ <= 1 && size <= 3);
4700 const HChar* nms[8]
4701 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4702 UInt ix = (bitQ << 2) | size;
4703 vassert(ix < 8);
4704 return nms[ix];
4705 }
4706
4707
4708 static
dis_ARM64_load_store(DisResult * dres,UInt insn,const VexAbiInfo * abiinfo)4709 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4710 const VexAbiInfo* abiinfo
4711 )
4712 {
4713 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4714
4715 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4716 /* uimm12 is scaled by the transfer size
4717
4718 31 29 26 21 9 4
4719 | | | | | |
4720 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4721 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4722
4723 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4724 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4725
4726 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4727 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4728
4729 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4730 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4731 */
4732 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4733 UInt szLg2 = INSN(31,30);
4734 UInt szB = 1 << szLg2;
4735 Bool isLD = INSN(22,22) == 1;
4736 UInt offs = INSN(21,10) * szB;
4737 UInt nn = INSN(9,5);
4738 UInt tt = INSN(4,0);
4739 IRTemp ta = newTemp(Ity_I64);
4740 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4741 if (nn == 31) { /* FIXME generate stack alignment check */ }
4742 vassert(szLg2 < 4);
4743 if (isLD) {
4744 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4745 } else {
4746 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4747 }
4748 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4749 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4750 DIP("%s %s, [%s, #%u]\n",
4751 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4752 nameIReg64orSP(nn), offs);
4753 return True;
4754 }
4755
4756 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4757 /*
4758 31 29 26 20 11 9 4
4759 | | | | | | |
4760 (at-Rn-then-Rn=EA) | | |
4761 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4762 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4763
4764 (at-EA-then-Rn=EA)
4765 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4766 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4767
4768 (at-EA)
4769 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4770 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4771
4772 simm9 is unscaled.
4773
4774 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4775 load case this is because would create two competing values for
4776 Rt. In the store case the reason is unclear, but the spec
4777 disallows it anyway.
4778
4779 Stores are narrowing, loads are unsigned widening. sz encodes
4780 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4781 */
4782 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4783 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4784 UInt szLg2 = INSN(31,30);
4785 UInt szB = 1 << szLg2;
4786 Bool isLoad = INSN(22,22) == 1;
4787 UInt imm9 = INSN(20,12);
4788 UInt nn = INSN(9,5);
4789 UInt tt = INSN(4,0);
4790 Bool wBack = INSN(10,10) == 1;
4791 UInt how = INSN(11,10);
4792 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4793 /* undecodable; fall through */
4794 } else {
4795 if (nn == 31) { /* FIXME generate stack alignment check */ }
4796
4797 // Compute the transfer address TA and the writeback address WA.
4798 IRTemp tRN = newTemp(Ity_I64);
4799 assign(tRN, getIReg64orSP(nn));
4800 IRTemp tEA = newTemp(Ity_I64);
4801 Long simm9 = (Long)sx_to_64(imm9, 9);
4802 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4803
4804 IRTemp tTA = newTemp(Ity_I64);
4805 IRTemp tWA = newTemp(Ity_I64);
4806 switch (how) {
4807 case BITS2(0,1):
4808 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4809 case BITS2(1,1):
4810 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4811 case BITS2(0,0):
4812 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4813 default:
4814 vassert(0); /* NOTREACHED */
4815 }
4816
4817 /* Normally rN would be updated after the transfer. However, in
4818 the special case typifed by
4819 str x30, [sp,#-16]!
4820 it is necessary to update SP before the transfer, (1)
4821 because Memcheck will otherwise complain about a write
4822 below the stack pointer, and (2) because the segfault
4823 stack extension mechanism will otherwise extend the stack
4824 only down to SP before the instruction, which might not be
4825 far enough, if the -16 bit takes the actual access
4826 address to the next page.
4827 */
4828 Bool earlyWBack
4829 = wBack && simm9 < 0 && szB == 8
4830 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
4831
4832 if (wBack && earlyWBack)
4833 putIReg64orSP(nn, mkexpr(tEA));
4834
4835 if (isLoad) {
4836 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4837 } else {
4838 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4839 }
4840
4841 if (wBack && !earlyWBack)
4842 putIReg64orSP(nn, mkexpr(tEA));
4843
4844 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4845 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4846 const HChar* fmt_str = NULL;
4847 switch (how) {
4848 case BITS2(0,1):
4849 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4850 break;
4851 case BITS2(1,1):
4852 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4853 break;
4854 case BITS2(0,0):
4855 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4856 break;
4857 default:
4858 vassert(0);
4859 }
4860 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4861 nameIRegOrZR(szB == 8, tt),
4862 nameIReg64orSP(nn), simm9);
4863 return True;
4864 }
4865 }
4866
4867 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4868 /* L==1 => mm==LD
4869 L==0 => mm==ST
4870 x==0 => 32 bit transfers, and zero extended loads
4871 x==1 => 64 bit transfers
4872 simm7 is scaled by the (single-register) transfer size
4873
4874 (at-Rn-then-Rn=EA)
4875 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
4876
4877 (at-EA-then-Rn=EA)
4878 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
4879
4880 (at-EA)
4881 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
4882 */
4883 UInt insn_30_23 = INSN(30,23);
4884 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
4885 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
4886 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
4887 UInt bL = INSN(22,22);
4888 UInt bX = INSN(31,31);
4889 UInt bWBack = INSN(23,23);
4890 UInt rT1 = INSN(4,0);
4891 UInt rN = INSN(9,5);
4892 UInt rT2 = INSN(14,10);
4893 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
4894 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
4895 || (bL && rT1 == rT2)) {
4896 /* undecodable; fall through */
4897 } else {
4898 if (rN == 31) { /* FIXME generate stack alignment check */ }
4899
4900 // Compute the transfer address TA and the writeback address WA.
4901 IRTemp tRN = newTemp(Ity_I64);
4902 assign(tRN, getIReg64orSP(rN));
4903 IRTemp tEA = newTemp(Ity_I64);
4904 simm7 = (bX ? 8 : 4) * simm7;
4905 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4906
4907 IRTemp tTA = newTemp(Ity_I64);
4908 IRTemp tWA = newTemp(Ity_I64);
4909 switch (INSN(24,23)) {
4910 case BITS2(0,1):
4911 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4912 case BITS2(1,1):
4913 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4914 case BITS2(1,0):
4915 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4916 default:
4917 vassert(0); /* NOTREACHED */
4918 }
4919
4920 /* Normally rN would be updated after the transfer. However, in
4921 the special case typifed by
4922 stp x29, x30, [sp,#-112]!
4923 it is necessary to update SP before the transfer, (1)
4924 because Memcheck will otherwise complain about a write
4925 below the stack pointer, and (2) because the segfault
4926 stack extension mechanism will otherwise extend the stack
4927 only down to SP before the instruction, which might not be
4928 far enough, if the -112 bit takes the actual access
4929 address to the next page.
4930 */
4931 Bool earlyWBack
4932 = bWBack && simm7 < 0
4933 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
4934
4935 if (bWBack && earlyWBack)
4936 putIReg64orSP(rN, mkexpr(tEA));
4937
4938 /**/ if (bL == 1 && bX == 1) {
4939 // 64 bit load
4940 putIReg64orZR(rT1, loadLE(Ity_I64,
4941 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4942 putIReg64orZR(rT2, loadLE(Ity_I64,
4943 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
4944 } else if (bL == 1 && bX == 0) {
4945 // 32 bit load
4946 putIReg32orZR(rT1, loadLE(Ity_I32,
4947 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4948 putIReg32orZR(rT2, loadLE(Ity_I32,
4949 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
4950 } else if (bL == 0 && bX == 1) {
4951 // 64 bit store
4952 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4953 getIReg64orZR(rT1));
4954 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
4955 getIReg64orZR(rT2));
4956 } else {
4957 vassert(bL == 0 && bX == 0);
4958 // 32 bit store
4959 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4960 getIReg32orZR(rT1));
4961 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
4962 getIReg32orZR(rT2));
4963 }
4964
4965 if (bWBack && !earlyWBack)
4966 putIReg64orSP(rN, mkexpr(tEA));
4967
4968 const HChar* fmt_str = NULL;
4969 switch (INSN(24,23)) {
4970 case BITS2(0,1):
4971 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4972 break;
4973 case BITS2(1,1):
4974 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4975 break;
4976 case BITS2(1,0):
4977 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4978 break;
4979 default:
4980 vassert(0);
4981 }
4982 DIP(fmt_str, bL == 0 ? "st" : "ld",
4983 nameIRegOrZR(bX == 1, rT1),
4984 nameIRegOrZR(bX == 1, rT2),
4985 nameIReg64orSP(rN), simm7);
4986 return True;
4987 }
4988 }
4989
4990 /* -------- LDPSW (immediate, simm7) (INT REGS) -------- */
4991 /* Does 32 bit transfers which are sign extended to 64 bits.
4992 simm7 is scaled by the (single-register) transfer size
4993
4994 (at-Rn-then-Rn=EA)
4995 01 101 0001 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP], #imm
4996
4997 (at-EA-then-Rn=EA)
4998 01 101 0011 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]!
4999
5000 (at-EA)
5001 01 101 0010 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]
5002 */
5003 UInt insn_31_22 = INSN(31,22);
5004 if (insn_31_22 == BITS10(0,1,1,0,1,0,0,0,1,1)
5005 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,1,1)
5006 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,0,1)) {
5007 UInt bWBack = INSN(23,23);
5008 UInt rT1 = INSN(4,0);
5009 UInt rN = INSN(9,5);
5010 UInt rT2 = INSN(14,10);
5011 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5012 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
5013 || (rT1 == rT2)) {
5014 /* undecodable; fall through */
5015 } else {
5016 if (rN == 31) { /* FIXME generate stack alignment check */ }
5017
5018 // Compute the transfer address TA and the writeback address WA.
5019 IRTemp tRN = newTemp(Ity_I64);
5020 assign(tRN, getIReg64orSP(rN));
5021 IRTemp tEA = newTemp(Ity_I64);
5022 simm7 = 4 * simm7;
5023 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5024
5025 IRTemp tTA = newTemp(Ity_I64);
5026 IRTemp tWA = newTemp(Ity_I64);
5027 switch (INSN(24,23)) {
5028 case BITS2(0,1):
5029 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5030 case BITS2(1,1):
5031 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5032 case BITS2(1,0):
5033 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5034 default:
5035 vassert(0); /* NOTREACHED */
5036 }
5037
5038 // 32 bit load, sign extended to 64 bits
5039 putIReg64orZR(rT1, unop(Iop_32Sto64,
5040 loadLE(Ity_I32, binop(Iop_Add64,
5041 mkexpr(tTA),
5042 mkU64(0)))));
5043 putIReg64orZR(rT2, unop(Iop_32Sto64,
5044 loadLE(Ity_I32, binop(Iop_Add64,
5045 mkexpr(tTA),
5046 mkU64(4)))));
5047 if (bWBack)
5048 putIReg64orSP(rN, mkexpr(tEA));
5049
5050 const HChar* fmt_str = NULL;
5051 switch (INSN(24,23)) {
5052 case BITS2(0,1):
5053 fmt_str = "ldpsw %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5054 break;
5055 case BITS2(1,1):
5056 fmt_str = "ldpsw %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5057 break;
5058 case BITS2(1,0):
5059 fmt_str = "ldpsw %s, %s, [%s, #%lld] (at-Rn)\n";
5060 break;
5061 default:
5062 vassert(0);
5063 }
5064 DIP(fmt_str, nameIReg64orZR(rT1),
5065 nameIReg64orZR(rT2),
5066 nameIReg64orSP(rN), simm7);
5067 return True;
5068 }
5069 }
5070
5071 /* ---------------- LDR (literal, int reg) ---------------- */
5072 /* 31 29 23 4
5073 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
5074 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
5075 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
5076 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
5077 Just handles the first two cases for now.
5078 */
5079 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
5080 UInt imm19 = INSN(23,5);
5081 UInt rT = INSN(4,0);
5082 UInt bX = INSN(30,30);
5083 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5084 if (bX) {
5085 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
5086 } else {
5087 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
5088 }
5089 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
5090 return True;
5091 }
5092
5093 /* -------------- {LD,ST}R (integer register) --------------- */
5094 /* 31 29 20 15 12 11 9 4
5095 | | | | | | | |
5096 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
5097 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
5098 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
5099 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
5100
5101 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
5102 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
5103 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
5104 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
5105 */
5106 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
5107 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5108 HChar dis_buf[64];
5109 UInt szLg2 = INSN(31,30);
5110 Bool isLD = INSN(22,22) == 1;
5111 UInt tt = INSN(4,0);
5112 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5113 if (ea != IRTemp_INVALID) {
5114 switch (szLg2) {
5115 case 3: /* 64 bit */
5116 if (isLD) {
5117 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
5118 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
5119 } else {
5120 storeLE(mkexpr(ea), getIReg64orZR(tt));
5121 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
5122 }
5123 break;
5124 case 2: /* 32 bit */
5125 if (isLD) {
5126 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
5127 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
5128 } else {
5129 storeLE(mkexpr(ea), getIReg32orZR(tt));
5130 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
5131 }
5132 break;
5133 case 1: /* 16 bit */
5134 if (isLD) {
5135 putIReg64orZR(tt, unop(Iop_16Uto64,
5136 loadLE(Ity_I16, mkexpr(ea))));
5137 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5138 } else {
5139 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
5140 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5141 }
5142 break;
5143 case 0: /* 8 bit */
5144 if (isLD) {
5145 putIReg64orZR(tt, unop(Iop_8Uto64,
5146 loadLE(Ity_I8, mkexpr(ea))));
5147 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
5148 } else {
5149 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
5150 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5151 }
5152 break;
5153 default:
5154 vassert(0);
5155 }
5156 return True;
5157 }
5158 }
5159
5160 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5161 /* 31 29 26 23 21 9 4
5162 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5163 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5164 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5165 where
5166 Rt is Wt when x==1, Xt when x==0
5167 */
5168 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5169 /* Further checks on bits 31:30 and 22 */
5170 Bool valid = False;
5171 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5172 case BITS3(1,0,0):
5173 case BITS3(0,1,0): case BITS3(0,1,1):
5174 case BITS3(0,0,0): case BITS3(0,0,1):
5175 valid = True;
5176 break;
5177 }
5178 if (valid) {
5179 UInt szLg2 = INSN(31,30);
5180 UInt bitX = INSN(22,22);
5181 UInt imm12 = INSN(21,10);
5182 UInt nn = INSN(9,5);
5183 UInt tt = INSN(4,0);
5184 UInt szB = 1 << szLg2;
5185 IRExpr* ea = binop(Iop_Add64,
5186 getIReg64orSP(nn), mkU64(imm12 * szB));
5187 switch (szB) {
5188 case 4:
5189 vassert(bitX == 0);
5190 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5191 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5192 nameIReg64orSP(nn), imm12 * szB);
5193 break;
5194 case 2:
5195 if (bitX == 1) {
5196 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5197 } else {
5198 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5199 }
5200 DIP("ldrsh %s, [%s, #%u]\n",
5201 nameIRegOrZR(bitX == 0, tt),
5202 nameIReg64orSP(nn), imm12 * szB);
5203 break;
5204 case 1:
5205 if (bitX == 1) {
5206 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5207 } else {
5208 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5209 }
5210 DIP("ldrsb %s, [%s, #%u]\n",
5211 nameIRegOrZR(bitX == 0, tt),
5212 nameIReg64orSP(nn), imm12 * szB);
5213 break;
5214 default:
5215 vassert(0);
5216 }
5217 return True;
5218 }
5219 /* else fall through */
5220 }
5221
5222 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5223 /* (at-Rn-then-Rn=EA)
5224 31 29 23 21 20 11 9 4
5225 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5226 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5227 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5228
5229 (at-EA-then-Rn=EA)
5230 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5231 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5232 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5233 where
5234 Rt is Wt when x==1, Xt when x==0
5235 transfer-at-Rn when [11]==0, at EA when [11]==1
5236 */
5237 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5238 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5239 /* Further checks on bits 31:30 and 22 */
5240 Bool valid = False;
5241 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5242 case BITS3(1,0,0): // LDRSW Xt
5243 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5244 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5245 valid = True;
5246 break;
5247 }
5248 if (valid) {
5249 UInt szLg2 = INSN(31,30);
5250 UInt imm9 = INSN(20,12);
5251 Bool atRN = INSN(11,11) == 0;
5252 UInt nn = INSN(9,5);
5253 UInt tt = INSN(4,0);
5254 IRTemp tRN = newTemp(Ity_I64);
5255 IRTemp tEA = newTemp(Ity_I64);
5256 IRTemp tTA = IRTemp_INVALID;
5257 ULong simm9 = sx_to_64(imm9, 9);
5258 Bool is64 = INSN(22,22) == 0;
5259 assign(tRN, getIReg64orSP(nn));
5260 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5261 tTA = atRN ? tRN : tEA;
5262 HChar ch = '?';
5263 /* There are 5 cases:
5264 byte load, SX to 64
5265 byte load, SX to 32, ZX to 64
5266 halfword load, SX to 64
5267 halfword load, SX to 32, ZX to 64
5268 word load, SX to 64
5269 The ifs below handle them in the listed order.
5270 */
5271 if (szLg2 == 0) {
5272 ch = 'b';
5273 if (is64) {
5274 putIReg64orZR(tt, unop(Iop_8Sto64,
5275 loadLE(Ity_I8, mkexpr(tTA))));
5276 } else {
5277 putIReg32orZR(tt, unop(Iop_8Sto32,
5278 loadLE(Ity_I8, mkexpr(tTA))));
5279 }
5280 }
5281 else if (szLg2 == 1) {
5282 ch = 'h';
5283 if (is64) {
5284 putIReg64orZR(tt, unop(Iop_16Sto64,
5285 loadLE(Ity_I16, mkexpr(tTA))));
5286 } else {
5287 putIReg32orZR(tt, unop(Iop_16Sto32,
5288 loadLE(Ity_I16, mkexpr(tTA))));
5289 }
5290 }
5291 else if (szLg2 == 2 && is64) {
5292 ch = 'w';
5293 putIReg64orZR(tt, unop(Iop_32Sto64,
5294 loadLE(Ity_I32, mkexpr(tTA))));
5295 }
5296 else {
5297 vassert(0);
5298 }
5299 putIReg64orSP(nn, mkexpr(tEA));
5300 DIP(atRN ? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!",
5301 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5302 return True;
5303 }
5304 /* else fall through */
5305 }
5306
5307 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5308 /* 31 29 23 21 20 11 9 4
5309 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5310 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5311 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5312 where
5313 Rt is Wt when x==1, Xt when x==0
5314 */
5315 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5316 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5317 /* Further checks on bits 31:30 and 22 */
5318 Bool valid = False;
5319 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5320 case BITS3(1,0,0): // LDURSW Xt
5321 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5322 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5323 valid = True;
5324 break;
5325 }
5326 if (valid) {
5327 UInt szLg2 = INSN(31,30);
5328 UInt imm9 = INSN(20,12);
5329 UInt nn = INSN(9,5);
5330 UInt tt = INSN(4,0);
5331 IRTemp tRN = newTemp(Ity_I64);
5332 IRTemp tEA = newTemp(Ity_I64);
5333 ULong simm9 = sx_to_64(imm9, 9);
5334 Bool is64 = INSN(22,22) == 0;
5335 assign(tRN, getIReg64orSP(nn));
5336 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5337 HChar ch = '?';
5338 /* There are 5 cases:
5339 byte load, SX to 64
5340 byte load, SX to 32, ZX to 64
5341 halfword load, SX to 64
5342 halfword load, SX to 32, ZX to 64
5343 word load, SX to 64
5344 The ifs below handle them in the listed order.
5345 */
5346 if (szLg2 == 0) {
5347 ch = 'b';
5348 if (is64) {
5349 putIReg64orZR(tt, unop(Iop_8Sto64,
5350 loadLE(Ity_I8, mkexpr(tEA))));
5351 } else {
5352 putIReg32orZR(tt, unop(Iop_8Sto32,
5353 loadLE(Ity_I8, mkexpr(tEA))));
5354 }
5355 }
5356 else if (szLg2 == 1) {
5357 ch = 'h';
5358 if (is64) {
5359 putIReg64orZR(tt, unop(Iop_16Sto64,
5360 loadLE(Ity_I16, mkexpr(tEA))));
5361 } else {
5362 putIReg32orZR(tt, unop(Iop_16Sto32,
5363 loadLE(Ity_I16, mkexpr(tEA))));
5364 }
5365 }
5366 else if (szLg2 == 2 && is64) {
5367 ch = 'w';
5368 putIReg64orZR(tt, unop(Iop_32Sto64,
5369 loadLE(Ity_I32, mkexpr(tEA))));
5370 }
5371 else {
5372 vassert(0);
5373 }
5374 DIP("ldurs%c %s, [%s, #%lld]",
5375 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), (Long)simm9);
5376 return True;
5377 }
5378 /* else fall through */
5379 }
5380
5381 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5382 /* L==1 => mm==LD
5383 L==0 => mm==ST
5384 sz==00 => 32 bit (S) transfers
5385 sz==01 => 64 bit (D) transfers
5386 sz==10 => 128 bit (Q) transfers
5387 sz==11 isn't allowed
5388 simm7 is scaled by the (single-register) transfer size
5389
5390 31 29 26 22 21 14 9 4
5391
5392 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5393 (at-EA, with nontemporal hint)
5394
5395 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5396 (at-Rn-then-Rn=EA)
5397
5398 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
5399 (at-EA)
5400
5401 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5402 (at-EA-then-Rn=EA)
5403 */
5404 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5405 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5406 Bool isLD = INSN(22,22) == 1;
5407 Bool wBack = INSN(23,23) == 1;
5408 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5409 UInt tt2 = INSN(14,10);
5410 UInt nn = INSN(9,5);
5411 UInt tt1 = INSN(4,0);
5412 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5413 /* undecodable; fall through */
5414 } else {
5415 if (nn == 31) { /* FIXME generate stack alignment check */ }
5416
5417 // Compute the transfer address TA and the writeback address WA.
5418 UInt szB = 4 << szSlg2; /* szB is the per-register size */
5419 IRTemp tRN = newTemp(Ity_I64);
5420 assign(tRN, getIReg64orSP(nn));
5421 IRTemp tEA = newTemp(Ity_I64);
5422 simm7 = szB * simm7;
5423 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5424
5425 IRTemp tTA = newTemp(Ity_I64);
5426 IRTemp tWA = newTemp(Ity_I64);
5427 switch (INSN(24,23)) {
5428 case BITS2(0,1):
5429 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5430 case BITS2(1,1):
5431 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5432 case BITS2(1,0):
5433 case BITS2(0,0):
5434 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5435 default:
5436 vassert(0); /* NOTREACHED */
5437 }
5438
5439 IRType ty = Ity_INVALID;
5440 switch (szB) {
5441 case 4: ty = Ity_F32; break;
5442 case 8: ty = Ity_F64; break;
5443 case 16: ty = Ity_V128; break;
5444 default: vassert(0);
5445 }
5446
5447 /* Normally rN would be updated after the transfer. However, in
5448 the special cases typifed by
5449 stp q0, q1, [sp,#-512]!
5450 stp d0, d1, [sp,#-512]!
5451 stp s0, s1, [sp,#-512]!
5452 it is necessary to update SP before the transfer, (1)
5453 because Memcheck will otherwise complain about a write
5454 below the stack pointer, and (2) because the segfault
5455 stack extension mechanism will otherwise extend the stack
5456 only down to SP before the instruction, which might not be
5457 far enough, if the -512 bit takes the actual access
5458 address to the next page.
5459 */
5460 Bool earlyWBack
5461 = wBack && simm7 < 0
5462 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5463
5464 if (wBack && earlyWBack)
5465 putIReg64orSP(nn, mkexpr(tEA));
5466
5467 if (isLD) {
5468 if (szB < 16) {
5469 putQReg128(tt1, mkV128(0x0000));
5470 }
5471 putQRegLO(tt1,
5472 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
5473 if (szB < 16) {
5474 putQReg128(tt2, mkV128(0x0000));
5475 }
5476 putQRegLO(tt2,
5477 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
5478 } else {
5479 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
5480 getQRegLO(tt1, ty));
5481 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
5482 getQRegLO(tt2, ty));
5483 }
5484
5485 if (wBack && !earlyWBack)
5486 putIReg64orSP(nn, mkexpr(tEA));
5487
5488 const HChar* fmt_str = NULL;
5489 switch (INSN(24,23)) {
5490 case BITS2(0,1):
5491 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5492 break;
5493 case BITS2(1,1):
5494 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5495 break;
5496 case BITS2(1,0):
5497 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5498 break;
5499 case BITS2(0,0):
5500 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5501 break;
5502 default:
5503 vassert(0);
5504 }
5505 DIP(fmt_str, isLD ? "ld" : "st",
5506 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
5507 nameIReg64orSP(nn), simm7);
5508 return True;
5509 }
5510 }
5511
5512 /* -------------- {LD,ST}R (vector register) --------------- */
5513 /* 31 29 23 20 15 12 11 9 4
5514 | | | | | | | | |
5515 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5516 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5517 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5518 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5519 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5520
5521 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5522 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5523 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5524 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5525 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5526 */
5527 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5528 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5529 HChar dis_buf[64];
5530 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5531 Bool isLD = INSN(22,22) == 1;
5532 UInt tt = INSN(4,0);
5533 if (szLg2 > 4) goto after_LDR_STR_vector_register;
5534 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5535 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5536 switch (szLg2) {
5537 case 0: /* 8 bit */
5538 if (isLD) {
5539 putQReg128(tt, mkV128(0x0000));
5540 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5541 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5542 } else {
5543 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5544 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5545 }
5546 break;
5547 case 1:
5548 if (isLD) {
5549 putQReg128(tt, mkV128(0x0000));
5550 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5551 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5552 } else {
5553 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5554 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5555 }
5556 break;
5557 case 2: /* 32 bit */
5558 if (isLD) {
5559 putQReg128(tt, mkV128(0x0000));
5560 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5561 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5562 } else {
5563 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5564 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5565 }
5566 break;
5567 case 3: /* 64 bit */
5568 if (isLD) {
5569 putQReg128(tt, mkV128(0x0000));
5570 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5571 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5572 } else {
5573 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5574 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5575 }
5576 break;
5577 case 4:
5578 if (isLD) {
5579 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5580 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5581 } else {
5582 storeLE(mkexpr(ea), getQReg128(tt));
5583 DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5584 }
5585 break;
5586 default:
5587 vassert(0);
5588 }
5589 return True;
5590 }
5591 after_LDR_STR_vector_register:
5592
5593 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5594 /* 31 29 22 20 15 12 11 9 4
5595 | | | | | | | | |
5596 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5597
5598 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5599 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5600
5601 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5602 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5603 */
5604 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5605 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5606 HChar dis_buf[64];
5607 UInt szLg2 = INSN(31,30);
5608 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5609 UInt tt = INSN(4,0);
5610 if (szLg2 == 3) goto after_LDRS_integer_register;
5611 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5612 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5613 /* Enumerate the 5 variants explicitly. */
5614 if (szLg2 == 2/*32 bit*/ && sxTo64) {
5615 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5616 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5617 return True;
5618 }
5619 else
5620 if (szLg2 == 1/*16 bit*/) {
5621 if (sxTo64) {
5622 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5623 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5624 } else {
5625 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5626 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5627 }
5628 return True;
5629 }
5630 else
5631 if (szLg2 == 0/*8 bit*/) {
5632 if (sxTo64) {
5633 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5634 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5635 } else {
5636 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5637 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5638 }
5639 return True;
5640 }
5641 /* else it's an invalid combination */
5642 }
5643 after_LDRS_integer_register:
5644
5645 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5646 /* This is the Unsigned offset variant only. The Post-Index and
5647 Pre-Index variants are below.
5648
5649 31 29 23 21 9 4
5650 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5651 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5652 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5653 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5654 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5655
5656 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5657 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5658 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5659 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5660 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5661 */
5662 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5663 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5664 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5665 Bool isLD = INSN(22,22) == 1;
5666 UInt pimm12 = INSN(21,10) << szLg2;
5667 UInt nn = INSN(9,5);
5668 UInt tt = INSN(4,0);
5669 IRTemp tEA = newTemp(Ity_I64);
5670 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5671 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5672 if (isLD) {
5673 if (szLg2 < 4) {
5674 putQReg128(tt, mkV128(0x0000));
5675 }
5676 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5677 } else {
5678 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5679 }
5680 DIP("%s %s, [%s, #%u]\n",
5681 isLD ? "ldr" : "str",
5682 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
5683 return True;
5684 }
5685
5686 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5687 /* These are the Post-Index and Pre-Index variants.
5688
5689 31 29 23 20 11 9 4
5690 (at-Rn-then-Rn=EA)
5691 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5692 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5693 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5694 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5695 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5696
5697 (at-EA-then-Rn=EA)
5698 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5699 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5700 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5701 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5702 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5703
5704 Stores are the same except with bit 22 set to 0.
5705 */
5706 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5707 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5708 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5709 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5710 Bool isLD = INSN(22,22) == 1;
5711 UInt imm9 = INSN(20,12);
5712 Bool atRN = INSN(11,11) == 0;
5713 UInt nn = INSN(9,5);
5714 UInt tt = INSN(4,0);
5715 IRTemp tRN = newTemp(Ity_I64);
5716 IRTemp tEA = newTemp(Ity_I64);
5717 IRTemp tTA = IRTemp_INVALID;
5718 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5719 ULong simm9 = sx_to_64(imm9, 9);
5720 assign(tRN, getIReg64orSP(nn));
5721 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5722 tTA = atRN ? tRN : tEA;
5723 if (isLD) {
5724 if (szLg2 < 4) {
5725 putQReg128(tt, mkV128(0x0000));
5726 }
5727 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
5728 } else {
5729 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
5730 }
5731 putIReg64orSP(nn, mkexpr(tEA));
5732 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5733 isLD ? "ldr" : "str",
5734 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5735 return True;
5736 }
5737
5738 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5739 /* 31 29 23 20 11 9 4
5740 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5741 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5742 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5743 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5744 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5745
5746 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5747 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5748 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5749 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5750 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5751 */
5752 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5753 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5754 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5755 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5756 Bool isLD = INSN(22,22) == 1;
5757 UInt imm9 = INSN(20,12);
5758 UInt nn = INSN(9,5);
5759 UInt tt = INSN(4,0);
5760 ULong simm9 = sx_to_64(imm9, 9);
5761 IRTemp tEA = newTemp(Ity_I64);
5762 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5763 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5764 if (isLD) {
5765 if (szLg2 < 4) {
5766 putQReg128(tt, mkV128(0x0000));
5767 }
5768 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5769 } else {
5770 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5771 }
5772 DIP("%s %s, [%s, #%lld]\n",
5773 isLD ? "ldur" : "stur",
5774 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5775 return True;
5776 }
5777
5778 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5779 /* 31 29 23 4
5780 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5781 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5782 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5783 */
5784 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5785 UInt szB = 4 << INSN(31,30);
5786 UInt imm19 = INSN(23,5);
5787 UInt tt = INSN(4,0);
5788 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5789 IRType ty = preferredVectorSubTypeFromSize(szB);
5790 putQReg128(tt, mkV128(0x0000));
5791 putQRegLO(tt, loadLE(ty, mkU64(ea)));
5792 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
5793 return True;
5794 }
5795
5796 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5797 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5798 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5799 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5800 /* 31 29 26 22 21 20 15 11 9 4
5801
5802 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5803 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
5804
5805 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5806 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
5807
5808 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5809 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
5810
5811 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
5812 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
5813
5814 T = defined by Q and sz in the normal way
5815 step = if m == 11111 then transfer-size else Xm
5816 xx = case L of 1 -> LD ; 0 -> ST
5817 */
5818 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5819 && INSN(21,21) == 0) {
5820 Bool bitQ = INSN(30,30);
5821 Bool isPX = INSN(23,23) == 1;
5822 Bool isLD = INSN(22,22) == 1;
5823 UInt mm = INSN(20,16);
5824 UInt opc = INSN(15,12);
5825 UInt sz = INSN(11,10);
5826 UInt nn = INSN(9,5);
5827 UInt tt = INSN(4,0);
5828 Bool isQ = bitQ == 1;
5829 Bool is1d = sz == BITS2(1,1) && !isQ;
5830 UInt nRegs = 0;
5831 switch (opc) {
5832 case BITS4(0,0,0,0): nRegs = 4; break;
5833 case BITS4(0,1,0,0): nRegs = 3; break;
5834 case BITS4(1,0,0,0): nRegs = 2; break;
5835 case BITS4(0,1,1,1): nRegs = 1; break;
5836 default: break;
5837 }
5838
5839 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5840 If we see it, set nRegs to 0 so as to cause the next conditional
5841 to fail. */
5842 if (!isPX && mm != 0)
5843 nRegs = 0;
5844
5845 if (nRegs == 1 /* .1d is allowed */
5846 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
5847
5848 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5849
5850 /* Generate the transfer address (TA) and if necessary the
5851 writeback address (WB) */
5852 IRTemp tTA = newTemp(Ity_I64);
5853 assign(tTA, getIReg64orSP(nn));
5854 if (nn == 31) { /* FIXME generate stack alignment check */ }
5855 IRTemp tWB = IRTemp_INVALID;
5856 if (isPX) {
5857 tWB = newTemp(Ity_I64);
5858 assign(tWB, binop(Iop_Add64,
5859 mkexpr(tTA),
5860 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5861 : getIReg64orZR(mm)));
5862 }
5863
5864 /* -- BEGIN generate the transfers -- */
5865
5866 IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
5867 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
5868 switch (nRegs) {
5869 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
5870 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
5871 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
5872 case 1: u0 = newTempV128(); i0 = newTempV128(); break;
5873 default: vassert(0);
5874 }
5875
5876 /* -- Multiple 128 or 64 bit stores -- */
5877 if (!isLD) {
5878 switch (nRegs) {
5879 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5880 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5881 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
5882 case 1: assign(u0, getQReg128((tt+0) % 32)); break;
5883 default: vassert(0);
5884 }
5885 switch (nRegs) {
5886 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
5887 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
5888 break;
5889 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
5890 (&i0, &i1, &i2, sz, u0, u1, u2);
5891 break;
5892 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
5893 (&i0, &i1, sz, u0, u1);
5894 break;
5895 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
5896 (&i0, sz, u0);
5897 break;
5898 default: vassert(0);
5899 }
5900 # define MAYBE_NARROW_TO_64(_expr) \
5901 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5902 UInt step = isQ ? 16 : 8;
5903 switch (nRegs) {
5904 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5905 MAYBE_NARROW_TO_64(mkexpr(i3)) );
5906 /* fallthru */
5907 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5908 MAYBE_NARROW_TO_64(mkexpr(i2)) );
5909 /* fallthru */
5910 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5911 MAYBE_NARROW_TO_64(mkexpr(i1)) );
5912 /* fallthru */
5913 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5914 MAYBE_NARROW_TO_64(mkexpr(i0)) );
5915 break;
5916 default: vassert(0);
5917 }
5918 # undef MAYBE_NARROW_TO_64
5919 }
5920
5921 /* -- Multiple 128 or 64 bit loads -- */
5922 else /* isLD */ {
5923 UInt step = isQ ? 16 : 8;
5924 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5925 # define MAYBE_WIDEN_FROM_64(_expr) \
5926 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5927 switch (nRegs) {
5928 case 4:
5929 assign(i3, MAYBE_WIDEN_FROM_64(
5930 loadLE(loadTy,
5931 binop(Iop_Add64, mkexpr(tTA),
5932 mkU64(3 * step)))));
5933 /* fallthru */
5934 case 3:
5935 assign(i2, MAYBE_WIDEN_FROM_64(
5936 loadLE(loadTy,
5937 binop(Iop_Add64, mkexpr(tTA),
5938 mkU64(2 * step)))));
5939 /* fallthru */
5940 case 2:
5941 assign(i1, MAYBE_WIDEN_FROM_64(
5942 loadLE(loadTy,
5943 binop(Iop_Add64, mkexpr(tTA),
5944 mkU64(1 * step)))));
5945 /* fallthru */
5946 case 1:
5947 assign(i0, MAYBE_WIDEN_FROM_64(
5948 loadLE(loadTy,
5949 binop(Iop_Add64, mkexpr(tTA),
5950 mkU64(0 * step)))));
5951 break;
5952 default:
5953 vassert(0);
5954 }
5955 # undef MAYBE_WIDEN_FROM_64
5956 switch (nRegs) {
5957 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
5958 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
5959 break;
5960 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
5961 (&u0, &u1, &u2, sz, i0, i1, i2);
5962 break;
5963 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
5964 (&u0, &u1, sz, i0, i1);
5965 break;
5966 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
5967 (&u0, sz, i0);
5968 break;
5969 default: vassert(0);
5970 }
5971 switch (nRegs) {
5972 case 4: putQReg128( (tt+3) % 32,
5973 math_MAYBE_ZERO_HI64(bitQ, u3));
5974 /* fallthru */
5975 case 3: putQReg128( (tt+2) % 32,
5976 math_MAYBE_ZERO_HI64(bitQ, u2));
5977 /* fallthru */
5978 case 2: putQReg128( (tt+1) % 32,
5979 math_MAYBE_ZERO_HI64(bitQ, u1));
5980 /* fallthru */
5981 case 1: putQReg128( (tt+0) % 32,
5982 math_MAYBE_ZERO_HI64(bitQ, u0));
5983 break;
5984 default: vassert(0);
5985 }
5986 }
5987
5988 /* -- END generate the transfers -- */
5989
5990 /* Do the writeback, if necessary */
5991 if (isPX) {
5992 putIReg64orSP(nn, mkexpr(tWB));
5993 }
5994
5995 HChar pxStr[20];
5996 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
5997 if (isPX) {
5998 if (mm == BITS5(1,1,1,1,1))
5999 vex_sprintf(pxStr, ", #%u", xferSzB);
6000 else
6001 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6002 }
6003 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6004 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
6005 isLD ? "ld" : "st", nRegs,
6006 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6007 pxStr);
6008
6009 return True;
6010 }
6011 /* else fall through */
6012 }
6013
6014 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
6015 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
6016 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
6017 /* 31 29 26 22 21 20 15 11 9 4
6018
6019 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
6020 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
6021
6022 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
6023 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
6024
6025 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
6026 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
6027
6028 T = defined by Q and sz in the normal way
6029 step = if m == 11111 then transfer-size else Xm
6030 xx = case L of 1 -> LD ; 0 -> ST
6031 */
6032 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6033 && INSN(21,21) == 0) {
6034 Bool bitQ = INSN(30,30);
6035 Bool isPX = INSN(23,23) == 1;
6036 Bool isLD = INSN(22,22) == 1;
6037 UInt mm = INSN(20,16);
6038 UInt opc = INSN(15,12);
6039 UInt sz = INSN(11,10);
6040 UInt nn = INSN(9,5);
6041 UInt tt = INSN(4,0);
6042 Bool isQ = bitQ == 1;
6043 UInt nRegs = 0;
6044 switch (opc) {
6045 case BITS4(0,0,1,0): nRegs = 4; break;
6046 case BITS4(0,1,1,0): nRegs = 3; break;
6047 case BITS4(1,0,1,0): nRegs = 2; break;
6048 default: break;
6049 }
6050
6051 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6052 If we see it, set nRegs to 0 so as to cause the next conditional
6053 to fail. */
6054 if (!isPX && mm != 0)
6055 nRegs = 0;
6056
6057 if (nRegs >= 2 && nRegs <= 4) {
6058
6059 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
6060
6061 /* Generate the transfer address (TA) and if necessary the
6062 writeback address (WB) */
6063 IRTemp tTA = newTemp(Ity_I64);
6064 assign(tTA, getIReg64orSP(nn));
6065 if (nn == 31) { /* FIXME generate stack alignment check */ }
6066 IRTemp tWB = IRTemp_INVALID;
6067 if (isPX) {
6068 tWB = newTemp(Ity_I64);
6069 assign(tWB, binop(Iop_Add64,
6070 mkexpr(tTA),
6071 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6072 : getIReg64orZR(mm)));
6073 }
6074
6075 /* -- BEGIN generate the transfers -- */
6076
6077 IRTemp u0, u1, u2, u3;
6078 u0 = u1 = u2 = u3 = IRTemp_INVALID;
6079 switch (nRegs) {
6080 case 4: u3 = newTempV128(); /* fallthru */
6081 case 3: u2 = newTempV128(); /* fallthru */
6082 case 2: u1 = newTempV128();
6083 u0 = newTempV128(); break;
6084 default: vassert(0);
6085 }
6086
6087 /* -- Multiple 128 or 64 bit stores -- */
6088 if (!isLD) {
6089 switch (nRegs) {
6090 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
6091 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
6092 case 2: assign(u1, getQReg128((tt+1) % 32));
6093 assign(u0, getQReg128((tt+0) % 32)); break;
6094 default: vassert(0);
6095 }
6096 # define MAYBE_NARROW_TO_64(_expr) \
6097 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6098 UInt step = isQ ? 16 : 8;
6099 switch (nRegs) {
6100 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
6101 MAYBE_NARROW_TO_64(mkexpr(u3)) );
6102 /* fallthru */
6103 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
6104 MAYBE_NARROW_TO_64(mkexpr(u2)) );
6105 /* fallthru */
6106 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
6107 MAYBE_NARROW_TO_64(mkexpr(u1)) );
6108 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
6109 MAYBE_NARROW_TO_64(mkexpr(u0)) );
6110 break;
6111 default: vassert(0);
6112 }
6113 # undef MAYBE_NARROW_TO_64
6114 }
6115
6116 /* -- Multiple 128 or 64 bit loads -- */
6117 else /* isLD */ {
6118 UInt step = isQ ? 16 : 8;
6119 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
6120 # define MAYBE_WIDEN_FROM_64(_expr) \
6121 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6122 switch (nRegs) {
6123 case 4:
6124 assign(u3, MAYBE_WIDEN_FROM_64(
6125 loadLE(loadTy,
6126 binop(Iop_Add64, mkexpr(tTA),
6127 mkU64(3 * step)))));
6128 /* fallthru */
6129 case 3:
6130 assign(u2, MAYBE_WIDEN_FROM_64(
6131 loadLE(loadTy,
6132 binop(Iop_Add64, mkexpr(tTA),
6133 mkU64(2 * step)))));
6134 /* fallthru */
6135 case 2:
6136 assign(u1, MAYBE_WIDEN_FROM_64(
6137 loadLE(loadTy,
6138 binop(Iop_Add64, mkexpr(tTA),
6139 mkU64(1 * step)))));
6140 assign(u0, MAYBE_WIDEN_FROM_64(
6141 loadLE(loadTy,
6142 binop(Iop_Add64, mkexpr(tTA),
6143 mkU64(0 * step)))));
6144 break;
6145 default:
6146 vassert(0);
6147 }
6148 # undef MAYBE_WIDEN_FROM_64
6149 switch (nRegs) {
6150 case 4: putQReg128( (tt+3) % 32,
6151 math_MAYBE_ZERO_HI64(bitQ, u3));
6152 /* fallthru */
6153 case 3: putQReg128( (tt+2) % 32,
6154 math_MAYBE_ZERO_HI64(bitQ, u2));
6155 /* fallthru */
6156 case 2: putQReg128( (tt+1) % 32,
6157 math_MAYBE_ZERO_HI64(bitQ, u1));
6158 putQReg128( (tt+0) % 32,
6159 math_MAYBE_ZERO_HI64(bitQ, u0));
6160 break;
6161 default: vassert(0);
6162 }
6163 }
6164
6165 /* -- END generate the transfers -- */
6166
6167 /* Do the writeback, if necessary */
6168 if (isPX) {
6169 putIReg64orSP(nn, mkexpr(tWB));
6170 }
6171
6172 HChar pxStr[20];
6173 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6174 if (isPX) {
6175 if (mm == BITS5(1,1,1,1,1))
6176 vex_sprintf(pxStr, ", #%u", xferSzB);
6177 else
6178 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6179 }
6180 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6181 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6182 isLD ? "ld" : "st",
6183 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6184 pxStr);
6185
6186 return True;
6187 }
6188 /* else fall through */
6189 }
6190
6191 /* ---------- LD1R (single structure, replicate) ---------- */
6192 /* ---------- LD2R (single structure, replicate) ---------- */
6193 /* ---------- LD3R (single structure, replicate) ---------- */
6194 /* ---------- LD4R (single structure, replicate) ---------- */
6195 /* 31 29 22 20 15 11 9 4
6196 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6197 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6198
6199 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6200 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6201
6202 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6203 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6204
6205 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6206 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6207
6208 step = if m == 11111 then transfer-size else Xm
6209 */
6210 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6211 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6212 && INSN(12,12) == 0) {
6213 UInt bitQ = INSN(30,30);
6214 Bool isPX = INSN(23,23) == 1;
6215 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6216 UInt mm = INSN(20,16);
6217 UInt sz = INSN(11,10);
6218 UInt nn = INSN(9,5);
6219 UInt tt = INSN(4,0);
6220
6221 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6222 if (isPX || mm == 0) {
6223
6224 IRType ty = integerIRTypeOfSize(1 << sz);
6225
6226 UInt laneSzB = 1 << sz;
6227 UInt xferSzB = laneSzB * nRegs;
6228
6229 /* Generate the transfer address (TA) and if necessary the
6230 writeback address (WB) */
6231 IRTemp tTA = newTemp(Ity_I64);
6232 assign(tTA, getIReg64orSP(nn));
6233 if (nn == 31) { /* FIXME generate stack alignment check */ }
6234 IRTemp tWB = IRTemp_INVALID;
6235 if (isPX) {
6236 tWB = newTemp(Ity_I64);
6237 assign(tWB, binop(Iop_Add64,
6238 mkexpr(tTA),
6239 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6240 : getIReg64orZR(mm)));
6241 }
6242
6243 /* Do the writeback, if necessary */
6244 if (isPX) {
6245 putIReg64orSP(nn, mkexpr(tWB));
6246 }
6247
6248 IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6249 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6250 switch (nRegs) {
6251 case 4:
6252 e3 = newTemp(ty);
6253 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6254 mkU64(3 * laneSzB))));
6255 v3 = math_DUP_TO_V128(e3, ty);
6256 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6257 /* fallthrough */
6258 case 3:
6259 e2 = newTemp(ty);
6260 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6261 mkU64(2 * laneSzB))));
6262 v2 = math_DUP_TO_V128(e2, ty);
6263 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6264 /* fallthrough */
6265 case 2:
6266 e1 = newTemp(ty);
6267 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6268 mkU64(1 * laneSzB))));
6269 v1 = math_DUP_TO_V128(e1, ty);
6270 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6271 /* fallthrough */
6272 case 1:
6273 e0 = newTemp(ty);
6274 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6275 mkU64(0 * laneSzB))));
6276 v0 = math_DUP_TO_V128(e0, ty);
6277 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6278 break;
6279 default:
6280 vassert(0);
6281 }
6282
6283 HChar pxStr[20];
6284 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6285 if (isPX) {
6286 if (mm == BITS5(1,1,1,1,1))
6287 vex_sprintf(pxStr, ", #%u", xferSzB);
6288 else
6289 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6290 }
6291 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6292 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6293 nRegs,
6294 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6295 pxStr);
6296
6297 return True;
6298 }
6299 /* else fall through */
6300 }
6301
6302 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6303 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6304 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6305 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6306 /* 31 29 22 21 20 15 11 9 4
6307 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6308 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
6309
6310 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6311 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
6312
6313 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6314 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6315
6316 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6317 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6318
6319 step = if m == 11111 then transfer-size else Xm
6320 op = case L of 1 -> LD ; 0 -> ST
6321
6322 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6323 01:b:b:b0 -> 2, bbb
6324 10:b:b:00 -> 4, bb
6325 10:b:0:01 -> 8, b
6326 */
6327 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6328 UInt bitQ = INSN(30,30);
6329 Bool isPX = INSN(23,23) == 1;
6330 Bool isLD = INSN(22,22) == 1;
6331 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6332 UInt mm = INSN(20,16);
6333 UInt xx = INSN(15,14);
6334 UInt bitS = INSN(12,12);
6335 UInt sz = INSN(11,10);
6336 UInt nn = INSN(9,5);
6337 UInt tt = INSN(4,0);
6338
6339 Bool valid = True;
6340
6341 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6342 if (!isPX && mm != 0)
6343 valid = False;
6344
6345 UInt laneSzB = 0; /* invalid */
6346 UInt ix = 16; /* invalid */
6347
6348 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6349 switch (xx_q_S_sz) {
6350 case 0x00: case 0x01: case 0x02: case 0x03:
6351 case 0x04: case 0x05: case 0x06: case 0x07:
6352 case 0x08: case 0x09: case 0x0A: case 0x0B:
6353 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6354 laneSzB = 1; ix = xx_q_S_sz & 0xF;
6355 break;
6356 case 0x10: case 0x12: case 0x14: case 0x16:
6357 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6358 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6359 break;
6360 case 0x20: case 0x24: case 0x28: case 0x2C:
6361 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6362 break;
6363 case 0x21: case 0x29:
6364 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6365 break;
6366 default:
6367 break;
6368 }
6369
6370 if (valid && laneSzB != 0) {
6371
6372 IRType ty = integerIRTypeOfSize(laneSzB);
6373 UInt xferSzB = laneSzB * nRegs;
6374
6375 /* Generate the transfer address (TA) and if necessary the
6376 writeback address (WB) */
6377 IRTemp tTA = newTemp(Ity_I64);
6378 assign(tTA, getIReg64orSP(nn));
6379 if (nn == 31) { /* FIXME generate stack alignment check */ }
6380 IRTemp tWB = IRTemp_INVALID;
6381 if (isPX) {
6382 tWB = newTemp(Ity_I64);
6383 assign(tWB, binop(Iop_Add64,
6384 mkexpr(tTA),
6385 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6386 : getIReg64orZR(mm)));
6387 }
6388
6389 /* Do the writeback, if necessary */
6390 if (isPX) {
6391 putIReg64orSP(nn, mkexpr(tWB));
6392 }
6393
6394 switch (nRegs) {
6395 case 4: {
6396 IRExpr* addr
6397 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6398 if (isLD) {
6399 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6400 } else {
6401 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6402 }
6403 /* fallthrough */
6404 }
6405 case 3: {
6406 IRExpr* addr
6407 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6408 if (isLD) {
6409 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6410 } else {
6411 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6412 }
6413 /* fallthrough */
6414 }
6415 case 2: {
6416 IRExpr* addr
6417 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6418 if (isLD) {
6419 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6420 } else {
6421 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6422 }
6423 /* fallthrough */
6424 }
6425 case 1: {
6426 IRExpr* addr
6427 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6428 if (isLD) {
6429 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6430 } else {
6431 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6432 }
6433 break;
6434 }
6435 default:
6436 vassert(0);
6437 }
6438
6439 HChar pxStr[20];
6440 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6441 if (isPX) {
6442 if (mm == BITS5(1,1,1,1,1))
6443 vex_sprintf(pxStr, ", #%u", xferSzB);
6444 else
6445 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6446 }
6447 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6448 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6449 isLD ? "ld" : "st", nRegs,
6450 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6451 ix, nameIReg64orSP(nn), pxStr);
6452
6453 return True;
6454 }
6455 /* else fall through */
6456 }
6457
6458 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6459 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6460 /* 31 29 23 20 14 9 4
6461 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6462 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6463 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6464 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6465 */
6466 /* For the "standard" implementation we pass through the LL and SC to
6467 the host. For the "fallback" implementation, for details see
6468 https://bugs.kde.org/show_bug.cgi?id=344524 and
6469 https://bugs.kde.org/show_bug.cgi?id=369459,
6470 but in short:
6471
6472 LoadLinked(addr)
6473 gs.LLsize = load_size // 1, 2, 4 or 8
6474 gs.LLaddr = addr
6475 gs.LLdata = zeroExtend(*addr)
6476
6477 StoreCond(addr, data)
6478 tmp_LLsize = gs.LLsize
6479 gs.LLsize = 0 // "no transaction"
6480 if tmp_LLsize != store_size -> fail
6481 if addr != gs.LLaddr -> fail
6482 if zeroExtend(*addr) != gs.LLdata -> fail
6483 cas_ok = CAS(store_size, addr, gs.LLdata -> data)
6484 if !cas_ok -> fail
6485 succeed
6486
6487 When thread scheduled
6488 gs.LLsize = 0 // "no transaction"
6489 (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
6490 has to do this bit)
6491 */
6492 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6493 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6494 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6495 UInt szBlg2 = INSN(31,30);
6496 Bool isLD = INSN(22,22) == 1;
6497 Bool isAcqOrRel = INSN(15,15) == 1;
6498 UInt ss = INSN(20,16);
6499 UInt nn = INSN(9,5);
6500 UInt tt = INSN(4,0);
6501
6502 vassert(szBlg2 < 4);
6503 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6504 IRType ty = integerIRTypeOfSize(szB);
6505 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6506
6507 IRTemp ea = newTemp(Ity_I64);
6508 assign(ea, getIReg64orSP(nn));
6509 /* FIXME generate check that ea is szB-aligned */
6510
6511 if (isLD && ss == BITS5(1,1,1,1,1)) {
6512 IRTemp res = newTemp(ty);
6513 if (abiinfo->guest__use_fallback_LLSC) {
6514 // Do the load first so we don't update any guest state
6515 // if it faults.
6516 IRTemp loaded_data64 = newTemp(Ity_I64);
6517 assign(loaded_data64, widenUto64(ty, loadLE(ty, mkexpr(ea))));
6518 stmt( IRStmt_Put( OFFB_LLSC_DATA, mkexpr(loaded_data64) ));
6519 stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
6520 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(szB) ));
6521 putIReg64orZR(tt, mkexpr(loaded_data64));
6522 } else {
6523 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6524 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6525 }
6526 if (isAcqOrRel) {
6527 stmt(IRStmt_MBE(Imbe_Fence));
6528 }
6529 DIP("ld%sx%s %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6530 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6531 abiinfo->guest__use_fallback_LLSC
6532 ? "(fallback implementation)" : "");
6533 return True;
6534 }
6535 if (!isLD) {
6536 if (isAcqOrRel) {
6537 stmt(IRStmt_MBE(Imbe_Fence));
6538 }
6539 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6540 if (abiinfo->guest__use_fallback_LLSC) {
6541 // This is really ugly, since we don't have any way to do
6542 // proper if-then-else. First, set up as if the SC failed,
6543 // and jump forwards if it really has failed.
6544
6545 // Continuation address
6546 IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4);
6547
6548 // "the SC failed". Any non-zero value means failure.
6549 putIReg64orZR(ss, mkU64(1));
6550
6551 IRTemp tmp_LLsize = newTemp(Ity_I64);
6552 assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
6553 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction"
6554 ));
6555 // Fail if no or wrong-size transaction
6556 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
6557 stmt( IRStmt_Exit(
6558 binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(szB)),
6559 Ijk_Boring, nia, OFFB_PC
6560 ));
6561 // Fail if the address doesn't match the LL address
6562 stmt( IRStmt_Exit(
6563 binop(Iop_CmpNE64, mkexpr(ea),
6564 IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
6565 Ijk_Boring, nia, OFFB_PC
6566 ));
6567 // Fail if the data doesn't match the LL data
6568 IRTemp llsc_data64 = newTemp(Ity_I64);
6569 assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64));
6570 stmt( IRStmt_Exit(
6571 binop(Iop_CmpNE64, widenUto64(ty, loadLE(ty, mkexpr(ea))),
6572 mkexpr(llsc_data64)),
6573 Ijk_Boring, nia, OFFB_PC
6574 ));
6575 // Try to CAS the new value in.
6576 IRTemp old = newTemp(ty);
6577 IRTemp expd = newTemp(ty);
6578 assign(expd, narrowFrom64(ty, mkexpr(llsc_data64)));
6579 stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
6580 Iend_LE, mkexpr(ea),
6581 /*expdHi*/NULL, mkexpr(expd),
6582 /*dataHi*/NULL, data
6583 )));
6584 // Fail if the CAS failed (viz, old != expd)
6585 stmt( IRStmt_Exit(
6586 binop(Iop_CmpNE64,
6587 widenUto64(ty, mkexpr(old)),
6588 widenUto64(ty, mkexpr(expd))),
6589 Ijk_Boring, nia, OFFB_PC
6590 ));
6591 // Otherwise we succeeded (!)
6592 putIReg64orZR(ss, mkU64(0));
6593 } else {
6594 IRTemp res = newTemp(Ity_I1);
6595 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6596 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6597 Need to set rS to 1 on failure, 0 on success. */
6598 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6599 mkU64(1)));
6600 }
6601 DIP("st%sx%s %s, %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6602 nameIRegOrZR(False, ss),
6603 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
6604 abiinfo->guest__use_fallback_LLSC
6605 ? "(fallback implementation)" : "");
6606 return True;
6607 }
6608 /* else fall through */
6609 }
6610
6611 /* ------------------ LDA{R,RH,RB} ------------------ */
6612 /* ------------------ STL{R,RH,RB} ------------------ */
6613 /* 31 29 23 20 14 9 4
6614 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
6615 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
6616 */
6617 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6618 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6619 UInt szBlg2 = INSN(31,30);
6620 Bool isLD = INSN(22,22) == 1;
6621 UInt nn = INSN(9,5);
6622 UInt tt = INSN(4,0);
6623
6624 vassert(szBlg2 < 4);
6625 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6626 IRType ty = integerIRTypeOfSize(szB);
6627 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6628
6629 IRTemp ea = newTemp(Ity_I64);
6630 assign(ea, getIReg64orSP(nn));
6631 /* FIXME generate check that ea is szB-aligned */
6632
6633 if (isLD) {
6634 IRTemp res = newTemp(ty);
6635 assign(res, loadLE(ty, mkexpr(ea)));
6636 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6637 stmt(IRStmt_MBE(Imbe_Fence));
6638 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6639 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6640 } else {
6641 stmt(IRStmt_MBE(Imbe_Fence));
6642 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6643 storeLE(mkexpr(ea), data);
6644 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6645 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6646 }
6647 return True;
6648 }
6649
6650 /* The PRFM cases that follow are possibly allow Rt values (the
6651 prefetch operation) which are not allowed by the documentation.
6652 This should be looked into. */
6653 /* ------------------ PRFM (immediate) ------------------ */
6654 /* 31 21 9 4
6655 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
6656 */
6657 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6658 UInt imm12 = INSN(21,10);
6659 UInt nn = INSN(9,5);
6660 UInt tt = INSN(4,0);
6661 /* Generating any IR here is pointless, except for documentation
6662 purposes, as it will get optimised away later. */
6663 IRTemp ea = newTemp(Ity_I64);
6664 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6665 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6666 return True;
6667 }
6668
6669 /* ------------------ PRFM (register) ------------------ */
6670 /* 31 29 22 20 15 12 11 9 4
6671 11 1110001 01 Rm opt S 10 Rn Rt PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}]
6672 */
6673 if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1)
6674 && INSN(11,10) == BITS2(1,0)) {
6675 HChar dis_buf[64];
6676 UInt tt = INSN(4,0);
6677 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
6678 if (ea != IRTemp_INVALID) {
6679 /* No actual code to generate. */
6680 DIP("prfm prfop=%u, %s\n", tt, dis_buf);
6681 return True;
6682 }
6683 }
6684
6685 /* ------------------ PRFM (unscaled offset) ------------------ */
6686 /* 31 29 22 20 11 9 4
6687 11 1110001 00 imm9 00 Rn Rt PRFM pfrop=Rt, [Xn|SP, #simm]
6688 */
6689 if (INSN(31,21) == BITS11(1,1, 1,1,1,0,0,0,1, 0,0)
6690 && INSN(11,10) == BITS2(0,0)) {
6691 ULong imm9 = INSN(20,12);
6692 UInt nn = INSN(9,5);
6693 UInt tt = INSN(4,0);
6694 ULong offset = sx_to_64(imm9, 9);
6695 IRTemp ea = newTemp(Ity_I64);
6696 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offset)));
6697 /* No actual code to generate. */
6698 DIP("prfum prfop=%u, [%s, #0x%llx]\n", tt, nameIReg64orSP(nn), offset);
6699 return True;
6700 }
6701
6702 vex_printf("ARM64 front end: load_store\n");
6703 return False;
6704 # undef INSN
6705 }
6706
6707
6708 /*------------------------------------------------------------*/
6709 /*--- Control flow and misc instructions ---*/
6710 /*------------------------------------------------------------*/
6711
6712 static
dis_ARM64_branch_etc(DisResult * dres,UInt insn,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo)6713 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
6714 const VexArchInfo* archinfo,
6715 const VexAbiInfo* abiinfo)
6716 {
6717 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6718
6719 /* ---------------------- B cond ----------------------- */
6720 /* 31 24 4 3
6721 0101010 0 imm19 0 cond */
6722 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6723 UInt cond = INSN(3,0);
6724 ULong uimm64 = INSN(23,5) << 2;
6725 Long simm64 = (Long)sx_to_64(uimm64, 21);
6726 vassert(dres->whatNext == Dis_Continue);
6727 vassert(dres->len == 4);
6728 vassert(dres->continueAt == 0);
6729 vassert(dres->jk_StopHere == Ijk_INVALID);
6730 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
6731 Ijk_Boring,
6732 IRConst_U64(guest_PC_curr_instr + simm64),
6733 OFFB_PC) );
6734 putPC(mkU64(guest_PC_curr_instr + 4));
6735 dres->whatNext = Dis_StopHere;
6736 dres->jk_StopHere = Ijk_Boring;
6737 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
6738 return True;
6739 }
6740
6741 /* -------------------- B{L} uncond -------------------- */
6742 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
6743 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
6744 100101 imm26 B (PC + sxTo64(imm26 << 2))
6745 */
6746 UInt bLink = INSN(31,31);
6747 ULong uimm64 = INSN(25,0) << 2;
6748 Long simm64 = (Long)sx_to_64(uimm64, 28);
6749 if (bLink) {
6750 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6751 }
6752 putPC(mkU64(guest_PC_curr_instr + simm64));
6753 dres->whatNext = Dis_StopHere;
6754 dres->jk_StopHere = Ijk_Call;
6755 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
6756 guest_PC_curr_instr + simm64);
6757 return True;
6758 }
6759
6760 /* --------------------- B{L} reg --------------------- */
6761 /* 31 24 22 20 15 9 4
6762 1101011 00 10 11111 000000 nn 00000 RET Rn
6763 1101011 00 01 11111 000000 nn 00000 CALL Rn
6764 1101011 00 00 11111 000000 nn 00000 JMP Rn
6765 */
6766 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
6767 && INSN(20,16) == BITS5(1,1,1,1,1)
6768 && INSN(15,10) == BITS6(0,0,0,0,0,0)
6769 && INSN(4,0) == BITS5(0,0,0,0,0)) {
6770 UInt branch_type = INSN(22,21);
6771 UInt nn = INSN(9,5);
6772 if (branch_type == BITS2(1,0) /* RET */) {
6773 putPC(getIReg64orZR(nn));
6774 dres->whatNext = Dis_StopHere;
6775 dres->jk_StopHere = Ijk_Ret;
6776 DIP("ret %s\n", nameIReg64orZR(nn));
6777 return True;
6778 }
6779 if (branch_type == BITS2(0,1) /* CALL */) {
6780 IRTemp dst = newTemp(Ity_I64);
6781 assign(dst, getIReg64orZR(nn));
6782 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6783 putPC(mkexpr(dst));
6784 dres->whatNext = Dis_StopHere;
6785 dres->jk_StopHere = Ijk_Call;
6786 DIP("blr %s\n", nameIReg64orZR(nn));
6787 return True;
6788 }
6789 if (branch_type == BITS2(0,0) /* JMP */) {
6790 putPC(getIReg64orZR(nn));
6791 dres->whatNext = Dis_StopHere;
6792 dres->jk_StopHere = Ijk_Boring;
6793 DIP("jmp %s\n", nameIReg64orZR(nn));
6794 return True;
6795 }
6796 }
6797
6798 /* -------------------- CB{N}Z -------------------- */
6799 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6800 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6801 */
6802 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
6803 Bool is64 = INSN(31,31) == 1;
6804 Bool bIfZ = INSN(24,24) == 0;
6805 ULong uimm64 = INSN(23,5) << 2;
6806 UInt rT = INSN(4,0);
6807 Long simm64 = (Long)sx_to_64(uimm64, 21);
6808 IRExpr* cond = NULL;
6809 if (is64) {
6810 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6811 getIReg64orZR(rT), mkU64(0));
6812 } else {
6813 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
6814 getIReg32orZR(rT), mkU32(0));
6815 }
6816 stmt( IRStmt_Exit(cond,
6817 Ijk_Boring,
6818 IRConst_U64(guest_PC_curr_instr + simm64),
6819 OFFB_PC) );
6820 putPC(mkU64(guest_PC_curr_instr + 4));
6821 dres->whatNext = Dis_StopHere;
6822 dres->jk_StopHere = Ijk_Boring;
6823 DIP("cb%sz %s, 0x%llx\n",
6824 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
6825 guest_PC_curr_instr + simm64);
6826 return True;
6827 }
6828
6829 /* -------------------- TB{N}Z -------------------- */
6830 /* 31 30 24 23 18 5 4
6831 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6832 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6833 */
6834 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
6835 UInt b5 = INSN(31,31);
6836 Bool bIfZ = INSN(24,24) == 0;
6837 UInt b40 = INSN(23,19);
6838 UInt imm14 = INSN(18,5);
6839 UInt tt = INSN(4,0);
6840 UInt bitNo = (b5 << 5) | b40;
6841 ULong uimm64 = imm14 << 2;
6842 Long simm64 = sx_to_64(uimm64, 16);
6843 IRExpr* cond
6844 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6845 binop(Iop_And64,
6846 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
6847 mkU64(1)),
6848 mkU64(0));
6849 stmt( IRStmt_Exit(cond,
6850 Ijk_Boring,
6851 IRConst_U64(guest_PC_curr_instr + simm64),
6852 OFFB_PC) );
6853 putPC(mkU64(guest_PC_curr_instr + 4));
6854 dres->whatNext = Dis_StopHere;
6855 dres->jk_StopHere = Ijk_Boring;
6856 DIP("tb%sz %s, #%u, 0x%llx\n",
6857 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
6858 guest_PC_curr_instr + simm64);
6859 return True;
6860 }
6861
6862 /* -------------------- SVC -------------------- */
6863 /* 11010100 000 imm16 000 01
6864 Don't bother with anything except the imm16==0 case.
6865 */
6866 if (INSN(31,0) == 0xD4000001) {
6867 putPC(mkU64(guest_PC_curr_instr + 4));
6868 dres->whatNext = Dis_StopHere;
6869 dres->jk_StopHere = Ijk_Sys_syscall;
6870 DIP("svc #0\n");
6871 return True;
6872 }
6873
6874 /* ------------------ M{SR,RS} ------------------ */
6875 /* ---- Cases for TPIDR_EL0 ----
6876 0xD51BD0 010 Rt MSR tpidr_el0, rT
6877 0xD53BD0 010 Rt MRS rT, tpidr_el0
6878 */
6879 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
6880 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
6881 Bool toSys = INSN(21,21) == 0;
6882 UInt tt = INSN(4,0);
6883 if (toSys) {
6884 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
6885 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
6886 } else {
6887 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
6888 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
6889 }
6890 return True;
6891 }
6892 /* ---- Cases for FPCR ----
6893 0xD51B44 000 Rt MSR fpcr, rT
6894 0xD53B44 000 Rt MSR rT, fpcr
6895 */
6896 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
6897 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
6898 Bool toSys = INSN(21,21) == 0;
6899 UInt tt = INSN(4,0);
6900 if (toSys) {
6901 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
6902 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
6903 } else {
6904 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
6905 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
6906 }
6907 return True;
6908 }
6909 /* ---- Cases for FPSR ----
6910 0xD51B44 001 Rt MSR fpsr, rT
6911 0xD53B44 001 Rt MSR rT, fpsr
6912 The only part of this we model is FPSR.QC. All other bits
6913 are ignored when writing to it and RAZ when reading from it.
6914 */
6915 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
6916 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
6917 Bool toSys = INSN(21,21) == 0;
6918 UInt tt = INSN(4,0);
6919 if (toSys) {
6920 /* Just deal with FPSR.QC. Make up a V128 value which is
6921 zero if Xt[27] is zero and any other value if Xt[27] is
6922 nonzero. */
6923 IRTemp qc64 = newTemp(Ity_I64);
6924 assign(qc64, binop(Iop_And64,
6925 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
6926 mkU64(1)));
6927 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
6928 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
6929 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
6930 } else {
6931 /* Generate a value which is all zeroes except for bit 27,
6932 which must be zero if QCFLAG is all zeroes and one otherwise. */
6933 IRTemp qcV128 = newTempV128();
6934 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
6935 IRTemp qc64 = newTemp(Ity_I64);
6936 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
6937 unop(Iop_V128to64, mkexpr(qcV128))));
6938 IRExpr* res = binop(Iop_Shl64,
6939 unop(Iop_1Uto64,
6940 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
6941 mkU8(27));
6942 putIReg64orZR(tt, res);
6943 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
6944 }
6945 return True;
6946 }
6947 /* ---- Cases for NZCV ----
6948 D51B42 000 Rt MSR nzcv, rT
6949 D53B42 000 Rt MRS rT, nzcv
6950 The only parts of NZCV that actually exist are bits 31:28, which
6951 are the N Z C and V bits themselves. Hence the flags thunk provides
6952 all the state we need.
6953 */
6954 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
6955 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
6956 Bool toSys = INSN(21,21) == 0;
6957 UInt tt = INSN(4,0);
6958 if (toSys) {
6959 IRTemp t = newTemp(Ity_I64);
6960 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
6961 setFlags_COPY(t);
6962 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
6963 } else {
6964 IRTemp res = newTemp(Ity_I64);
6965 assign(res, mk_arm64g_calculate_flags_nzcv());
6966 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
6967 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
6968 }
6969 return True;
6970 }
6971 /* ---- Cases for DCZID_EL0 ----
6972 Don't support arbitrary reads and writes to this register. Just
6973 return the value 16, which indicates that the DC ZVA instruction
6974 is not permitted, so we don't have to emulate it.
6975 D5 3B 00 111 Rt MRS rT, dczid_el0
6976 */
6977 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
6978 UInt tt = INSN(4,0);
6979 putIReg64orZR(tt, mkU64(1<<4));
6980 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
6981 return True;
6982 }
6983 /* ---- Cases for CTR_EL0 ----
6984 We just handle reads, and make up a value from the D and I line
6985 sizes in the VexArchInfo we are given, and patch in the following
6986 fields that the Foundation model gives ("natively"):
6987 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
6988 D5 3B 00 001 Rt MRS rT, dczid_el0
6989 */
6990 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
6991 UInt tt = INSN(4,0);
6992 /* Need to generate a value from dMinLine_lg2_szB and
6993 dMinLine_lg2_szB. The value in the register is in 32-bit
6994 units, so need to subtract 2 from the values in the
6995 VexArchInfo. We can assume that the values here are valid --
6996 disInstr_ARM64 checks them -- so there's no need to deal with
6997 out-of-range cases. */
6998 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6999 && archinfo->arm64_dMinLine_lg2_szB <= 17
7000 && archinfo->arm64_iMinLine_lg2_szB >= 2
7001 && archinfo->arm64_iMinLine_lg2_szB <= 17);
7002 UInt val
7003 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
7004 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
7005 putIReg64orZR(tt, mkU64(val));
7006 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
7007 return True;
7008 }
7009 /* ---- Cases for CNTVCT_EL0 ----
7010 This is a timestamp counter of some sort. Support reads of it only
7011 by passing through to the host.
7012 D5 3B E0 010 Rt MRS Xt, cntvct_el0
7013 */
7014 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
7015 UInt tt = INSN(4,0);
7016 IRTemp val = newTemp(Ity_I64);
7017 IRExpr** args = mkIRExprVec_0();
7018 IRDirty* d = unsafeIRDirty_1_N (
7019 val,
7020 0/*regparms*/,
7021 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
7022 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
7023 args
7024 );
7025 /* execute the dirty call, dumping the result in val. */
7026 stmt( IRStmt_Dirty(d) );
7027 putIReg64orZR(tt, mkexpr(val));
7028 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
7029 return True;
7030 }
7031 /* ---- Cases for CNTFRQ_EL0 ----
7032 This is always RO at EL0, so it's safe to pass through to the host.
7033 D5 3B E0 000 Rt MRS Xt, cntfrq_el0
7034 */
7035 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE000) {
7036 UInt tt = INSN(4,0);
7037 IRTemp val = newTemp(Ity_I64);
7038 IRExpr** args = mkIRExprVec_0();
7039 IRDirty* d = unsafeIRDirty_1_N (
7040 val,
7041 0/*regparms*/,
7042 "arm64g_dirtyhelper_MRS_CNTFRQ_EL0",
7043 &arm64g_dirtyhelper_MRS_CNTFRQ_EL0,
7044 args
7045 );
7046 /* execute the dirty call, dumping the result in val. */
7047 stmt( IRStmt_Dirty(d) );
7048 putIReg64orZR(tt, mkexpr(val));
7049 DIP("mrs %s, cntfrq_el0\n", nameIReg64orZR(tt));
7050 return True;
7051 }
7052
7053 /* ------------------ IC_IVAU ------------------ */
7054 /* D5 0B 75 001 Rt ic ivau, rT
7055 */
7056 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
7057 /* We will always be provided with a valid iMinLine value. */
7058 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
7059 && archinfo->arm64_iMinLine_lg2_szB <= 17);
7060 /* Round the requested address, in rT, down to the start of the
7061 containing block. */
7062 UInt tt = INSN(4,0);
7063 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
7064 IRTemp addr = newTemp(Ity_I64);
7065 assign( addr, binop( Iop_And64,
7066 getIReg64orZR(tt),
7067 mkU64(~(lineszB - 1))) );
7068 /* Set the invalidation range, request exit-and-invalidate, with
7069 continuation at the next instruction. */
7070 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7071 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
7072 /* be paranoid ... */
7073 stmt( IRStmt_MBE(Imbe_Fence) );
7074 putPC(mkU64( guest_PC_curr_instr + 4 ));
7075 dres->whatNext = Dis_StopHere;
7076 dres->jk_StopHere = Ijk_InvalICache;
7077 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
7078 return True;
7079 }
7080
7081 /* ------------------ DC_CVAU ------------------ */
7082 /* D5 0B 7B 001 Rt dc cvau, rT
7083 */
7084 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
7085 /* Exactly the same scheme as for IC IVAU, except we observe the
7086 dMinLine size, and request an Ijk_FlushDCache instead of
7087 Ijk_InvalICache. */
7088 /* We will always be provided with a valid dMinLine value. */
7089 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
7090 && archinfo->arm64_dMinLine_lg2_szB <= 17);
7091 /* Round the requested address, in rT, down to the start of the
7092 containing block. */
7093 UInt tt = INSN(4,0);
7094 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
7095 IRTemp addr = newTemp(Ity_I64);
7096 assign( addr, binop( Iop_And64,
7097 getIReg64orZR(tt),
7098 mkU64(~(lineszB - 1))) );
7099 /* Set the flush range, request exit-and-flush, with
7100 continuation at the next instruction. */
7101 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
7102 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
7103 /* be paranoid ... */
7104 stmt( IRStmt_MBE(Imbe_Fence) );
7105 putPC(mkU64( guest_PC_curr_instr + 4 ));
7106 dres->whatNext = Dis_StopHere;
7107 dres->jk_StopHere = Ijk_FlushDCache;
7108 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
7109 return True;
7110 }
7111
7112 /* ------------------ ISB, DMB, DSB ------------------ */
7113 /* 31 21 11 7 6 4
7114 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
7115 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
7116 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
7117 */
7118 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
7119 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
7120 && INSN(7,7) == 1
7121 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
7122 UInt opc = INSN(6,5);
7123 UInt CRm = INSN(11,8);
7124 vassert(opc <= 2 && CRm <= 15);
7125 stmt(IRStmt_MBE(Imbe_Fence));
7126 const HChar* opNames[3]
7127 = { "dsb", "dmb", "isb" };
7128 const HChar* howNames[16]
7129 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
7130 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
7131 DIP("%s %s\n", opNames[opc], howNames[CRm]);
7132 return True;
7133 }
7134
7135 /* -------------------- NOP -------------------- */
7136 if (INSN(31,0) == 0xD503201F) {
7137 DIP("nop\n");
7138 return True;
7139 }
7140
7141 /* -------------------- BRK -------------------- */
7142 /* 31 23 20 4
7143 1101 0100 001 imm16 00000 BRK #imm16
7144 */
7145 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
7146 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
7147 UInt imm16 = INSN(20,5);
7148 /* Request SIGTRAP and then restart of this insn. */
7149 putPC(mkU64(guest_PC_curr_instr + 0));
7150 dres->whatNext = Dis_StopHere;
7151 dres->jk_StopHere = Ijk_SigTRAP;
7152 DIP("brk #%u\n", imm16);
7153 return True;
7154 }
7155
7156 /* ------------------- YIELD ------------------- */
7157 /* 31 23 15 7
7158 1101 0101 0000 0011 0010 0000 0011 1111
7159 */
7160 if (INSN(31,0) == 0xD503203F) {
7161 /* Request yield followed by continuation at the next insn. */
7162 putPC(mkU64(guest_PC_curr_instr + 4));
7163 dres->whatNext = Dis_StopHere;
7164 dres->jk_StopHere = Ijk_Yield;
7165 DIP("yield\n");
7166 return True;
7167 }
7168
7169 /* -------------------- HINT ------------------- */
7170 /* 31 23 15 11 4 3
7171 1101 0101 0000 0011 0010 imm7 1 1111
7172 Catch otherwise unhandled HINT instructions - any
7173 like YIELD which are explicitly handled should go
7174 above this case.
7175 */
7176 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,1)
7177 && INSN(23,16) == BITS8(0,0,0,0,0,0,1,1)
7178 && INSN(15,12) == BITS4(0,0,1,0)
7179 && INSN(4,0) == BITS5(1,1,1,1,1)) {
7180 UInt imm7 = INSN(11,5);
7181 DIP("hint #%u\n", imm7);
7182 return True;
7183 }
7184
7185 /* ------------------- CLREX ------------------ */
7186 /* 31 23 15 11 7
7187 1101 0101 0000 0011 0011 m 0101 1111 CLREX CRm
7188 CRm is apparently ignored.
7189 */
7190 if ((INSN(31,0) & 0xFFFFF0FF) == 0xD503305F) {
7191 UInt mm = INSN(11,8);
7192 /* AFAICS, this simply cancels a (all?) reservations made by a
7193 (any?) preceding LDREX(es). Arrange to hand it through to
7194 the back end. */
7195 if (abiinfo->guest__use_fallback_LLSC) {
7196 stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) )); // "no transaction"
7197 } else {
7198 stmt( IRStmt_MBE(Imbe_CancelReservation) );
7199 }
7200 DIP("clrex #%u\n", mm);
7201 return True;
7202 }
7203
7204 vex_printf("ARM64 front end: branch_etc\n");
7205 return False;
7206 # undef INSN
7207 }
7208
7209
7210 /*------------------------------------------------------------*/
7211 /*--- SIMD and FP instructions: helper functions ---*/
7212 /*------------------------------------------------------------*/
7213
7214 /* Some constructors for interleave/deinterleave expressions. */
7215
mk_CatEvenLanes64x2(IRTemp a10,IRTemp b10)7216 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7217 // returns a0 b0
7218 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
7219 }
7220
mk_CatOddLanes64x2(IRTemp a10,IRTemp b10)7221 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7222 // returns a1 b1
7223 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
7224 }
7225
mk_CatEvenLanes32x4(IRTemp a3210,IRTemp b3210)7226 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7227 // returns a2 a0 b2 b0
7228 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
7229 }
7230
mk_CatOddLanes32x4(IRTemp a3210,IRTemp b3210)7231 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7232 // returns a3 a1 b3 b1
7233 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
7234 }
7235
mk_InterleaveLO32x4(IRTemp a3210,IRTemp b3210)7236 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
7237 // returns a1 b1 a0 b0
7238 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
7239 }
7240
mk_InterleaveHI32x4(IRTemp a3210,IRTemp b3210)7241 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
7242 // returns a3 b3 a2 b2
7243 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
7244 }
7245
mk_CatEvenLanes16x8(IRTemp a76543210,IRTemp b76543210)7246 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7247 // returns a6 a4 a2 a0 b6 b4 b2 b0
7248 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7249 }
7250
mk_CatOddLanes16x8(IRTemp a76543210,IRTemp b76543210)7251 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7252 // returns a7 a5 a3 a1 b7 b5 b3 b1
7253 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7254 }
7255
mk_InterleaveLO16x8(IRTemp a76543210,IRTemp b76543210)7256 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7257 // returns a3 b3 a2 b2 a1 b1 a0 b0
7258 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
7259 }
7260
mk_InterleaveHI16x8(IRTemp a76543210,IRTemp b76543210)7261 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7262 // returns a7 b7 a6 b6 a5 b5 a4 b4
7263 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
7264 }
7265
mk_CatEvenLanes8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)7266 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
7267 IRTemp bFEDCBA9876543210 ) {
7268 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7269 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
7270 mkexpr(bFEDCBA9876543210));
7271 }
7272
mk_CatOddLanes8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)7273 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
7274 IRTemp bFEDCBA9876543210 ) {
7275 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7276 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
7277 mkexpr(bFEDCBA9876543210));
7278 }
7279
mk_InterleaveLO8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)7280 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
7281 IRTemp bFEDCBA9876543210 ) {
7282 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7283 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
7284 mkexpr(bFEDCBA9876543210));
7285 }
7286
mk_InterleaveHI8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)7287 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
7288 IRTemp bFEDCBA9876543210 ) {
7289 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7290 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
7291 mkexpr(bFEDCBA9876543210));
7292 }
7293
7294 /* Generate N copies of |bit| in the bottom of a ULong. */
Replicate(ULong bit,Int N)7295 static ULong Replicate ( ULong bit, Int N )
7296 {
7297 vassert(bit <= 1 && N >= 1 && N < 64);
7298 if (bit == 0) {
7299 return 0;
7300 } else {
7301 /* Careful. This won't work for N == 64. */
7302 return (1ULL << N) - 1;
7303 }
7304 }
7305
Replicate32x2(ULong bits32)7306 static ULong Replicate32x2 ( ULong bits32 )
7307 {
7308 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
7309 return (bits32 << 32) | bits32;
7310 }
7311
Replicate16x4(ULong bits16)7312 static ULong Replicate16x4 ( ULong bits16 )
7313 {
7314 vassert(0 == (bits16 & ~0xFFFFULL));
7315 return Replicate32x2((bits16 << 16) | bits16);
7316 }
7317
Replicate8x8(ULong bits8)7318 static ULong Replicate8x8 ( ULong bits8 )
7319 {
7320 vassert(0 == (bits8 & ~0xFFULL));
7321 return Replicate16x4((bits8 << 8) | bits8);
7322 }
7323
7324 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
7325 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
7326 is 64. In the former case, the upper 32 bits of the returned value
7327 are guaranteed to be zero. */
VFPExpandImm(ULong imm8,Int N)7328 static ULong VFPExpandImm ( ULong imm8, Int N )
7329 {
7330 vassert(imm8 <= 0xFF);
7331 vassert(N == 32 || N == 64);
7332 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
7333 Int F = N - E - 1;
7334 ULong imm8_6 = (imm8 >> 6) & 1;
7335 /* sign: 1 bit */
7336 /* exp: E bits */
7337 /* frac: F bits */
7338 ULong sign = (imm8 >> 7) & 1;
7339 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
7340 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
7341 vassert(sign < (1ULL << 1));
7342 vassert(exp < (1ULL << E));
7343 vassert(frac < (1ULL << F));
7344 vassert(1 + E + F == N);
7345 ULong res = (sign << (E+F)) | (exp << F) | frac;
7346 return res;
7347 }
7348
7349 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
7350 This might fail, as indicated by the returned Bool. Page 2530 of
7351 the manual. */
AdvSIMDExpandImm(ULong * res,UInt op,UInt cmode,UInt imm8)7352 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
7353 UInt op, UInt cmode, UInt imm8 )
7354 {
7355 vassert(op <= 1);
7356 vassert(cmode <= 15);
7357 vassert(imm8 <= 255);
7358
7359 *res = 0; /* will overwrite iff returning True */
7360
7361 ULong imm64 = 0;
7362 Bool testimm8 = False;
7363
7364 switch (cmode >> 1) {
7365 case 0:
7366 testimm8 = False; imm64 = Replicate32x2(imm8); break;
7367 case 1:
7368 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7369 case 2:
7370 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7371 case 3:
7372 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7373 case 4:
7374 testimm8 = False; imm64 = Replicate16x4(imm8); break;
7375 case 5:
7376 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7377 case 6:
7378 testimm8 = True;
7379 if ((cmode & 1) == 0)
7380 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7381 else
7382 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7383 break;
7384 case 7:
7385 testimm8 = False;
7386 if ((cmode & 1) == 0 && op == 0)
7387 imm64 = Replicate8x8(imm8);
7388 if ((cmode & 1) == 0 && op == 1) {
7389 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7390 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7391 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7392 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7393 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7394 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7395 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7396 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7397 }
7398 if ((cmode & 1) == 1 && op == 0) {
7399 ULong imm8_7 = (imm8 >> 7) & 1;
7400 ULong imm8_6 = (imm8 >> 6) & 1;
7401 ULong imm8_50 = imm8 & 63;
7402 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
7403 | ((imm8_6 ^ 1) << (5 + 6 + 19))
7404 | (Replicate(imm8_6, 5) << (6 + 19))
7405 | (imm8_50 << 19);
7406 imm64 = Replicate32x2(imm32);
7407 }
7408 if ((cmode & 1) == 1 && op == 1) {
7409 // imm64 = imm8<7>:NOT(imm8<6>)
7410 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7411 ULong imm8_7 = (imm8 >> 7) & 1;
7412 ULong imm8_6 = (imm8 >> 6) & 1;
7413 ULong imm8_50 = imm8 & 63;
7414 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7415 | (Replicate(imm8_6, 8) << 54)
7416 | (imm8_50 << 48);
7417 }
7418 break;
7419 default:
7420 vassert(0);
7421 }
7422
7423 if (testimm8 && imm8 == 0)
7424 return False;
7425
7426 *res = imm64;
7427 return True;
7428 }
7429
7430 /* Help a bit for decoding laneage for vector operations that can be
7431 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7432 and SZ bits, typically for vector floating point. */
getLaneInfo_Q_SZ(IRType * tyI,IRType * tyF,UInt * nLanes,Bool * zeroUpper,const HChar ** arrSpec,Bool bitQ,Bool bitSZ)7433 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
7434 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7435 /*OUT*/const HChar** arrSpec,
7436 Bool bitQ, Bool bitSZ )
7437 {
7438 vassert(bitQ == True || bitQ == False);
7439 vassert(bitSZ == True || bitSZ == False);
7440 if (bitQ && bitSZ) { // 2x64
7441 if (tyI) *tyI = Ity_I64;
7442 if (tyF) *tyF = Ity_F64;
7443 if (nLanes) *nLanes = 2;
7444 if (zeroUpper) *zeroUpper = False;
7445 if (arrSpec) *arrSpec = "2d";
7446 return True;
7447 }
7448 if (bitQ && !bitSZ) { // 4x32
7449 if (tyI) *tyI = Ity_I32;
7450 if (tyF) *tyF = Ity_F32;
7451 if (nLanes) *nLanes = 4;
7452 if (zeroUpper) *zeroUpper = False;
7453 if (arrSpec) *arrSpec = "4s";
7454 return True;
7455 }
7456 if (!bitQ && !bitSZ) { // 2x32
7457 if (tyI) *tyI = Ity_I32;
7458 if (tyF) *tyF = Ity_F32;
7459 if (nLanes) *nLanes = 2;
7460 if (zeroUpper) *zeroUpper = True;
7461 if (arrSpec) *arrSpec = "2s";
7462 return True;
7463 }
7464 // Else impliedly 1x64, which isn't allowed.
7465 return False;
7466 }
7467
7468 /* Helper for decoding laneage for shift-style vector operations
7469 that involve an immediate shift amount. */
getLaneInfo_IMMH_IMMB(UInt * shift,UInt * szBlg2,UInt immh,UInt immb)7470 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7471 UInt immh, UInt immb )
7472 {
7473 vassert(immh < (1<<4));
7474 vassert(immb < (1<<3));
7475 UInt immhb = (immh << 3) | immb;
7476 if (immh & 8) {
7477 if (shift) *shift = 128 - immhb;
7478 if (szBlg2) *szBlg2 = 3;
7479 return True;
7480 }
7481 if (immh & 4) {
7482 if (shift) *shift = 64 - immhb;
7483 if (szBlg2) *szBlg2 = 2;
7484 return True;
7485 }
7486 if (immh & 2) {
7487 if (shift) *shift = 32 - immhb;
7488 if (szBlg2) *szBlg2 = 1;
7489 return True;
7490 }
7491 if (immh & 1) {
7492 if (shift) *shift = 16 - immhb;
7493 if (szBlg2) *szBlg2 = 0;
7494 return True;
7495 }
7496 return False;
7497 }
7498
7499 /* Generate IR to fold all lanes of the V128 value in 'src' as
7500 characterised by the operator 'op', and return the result in the
7501 bottom bits of a V128, with all other bits set to zero. */
math_FOLDV(IRTemp src,IROp op)7502 static IRTemp math_FOLDV ( IRTemp src, IROp op )
7503 {
7504 /* The basic idea is to use repeated applications of Iop_CatEven*
7505 and Iop_CatOdd* operators to 'src' so as to clone each lane into
7506 a complete vector. Then fold all those vectors with 'op' and
7507 zero out all but the least significant lane. */
7508 switch (op) {
7509 case Iop_Min8Sx16: case Iop_Min8Ux16:
7510 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
7511 /* NB: temp naming here is misleading -- the naming is for 8
7512 lanes of 16 bit, whereas what is being operated on is 16
7513 lanes of 8 bits. */
7514 IRTemp x76543210 = src;
7515 IRTemp x76547654 = newTempV128();
7516 IRTemp x32103210 = newTempV128();
7517 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7518 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7519 IRTemp x76767676 = newTempV128();
7520 IRTemp x54545454 = newTempV128();
7521 IRTemp x32323232 = newTempV128();
7522 IRTemp x10101010 = newTempV128();
7523 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7524 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7525 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7526 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7527 IRTemp x77777777 = newTempV128();
7528 IRTemp x66666666 = newTempV128();
7529 IRTemp x55555555 = newTempV128();
7530 IRTemp x44444444 = newTempV128();
7531 IRTemp x33333333 = newTempV128();
7532 IRTemp x22222222 = newTempV128();
7533 IRTemp x11111111 = newTempV128();
7534 IRTemp x00000000 = newTempV128();
7535 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7536 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7537 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7538 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7539 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7540 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7541 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7542 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7543 /* Naming not misleading after here. */
7544 IRTemp xAllF = newTempV128();
7545 IRTemp xAllE = newTempV128();
7546 IRTemp xAllD = newTempV128();
7547 IRTemp xAllC = newTempV128();
7548 IRTemp xAllB = newTempV128();
7549 IRTemp xAllA = newTempV128();
7550 IRTemp xAll9 = newTempV128();
7551 IRTemp xAll8 = newTempV128();
7552 IRTemp xAll7 = newTempV128();
7553 IRTemp xAll6 = newTempV128();
7554 IRTemp xAll5 = newTempV128();
7555 IRTemp xAll4 = newTempV128();
7556 IRTemp xAll3 = newTempV128();
7557 IRTemp xAll2 = newTempV128();
7558 IRTemp xAll1 = newTempV128();
7559 IRTemp xAll0 = newTempV128();
7560 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7561 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7562 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7563 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7564 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7565 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7566 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7567 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7568 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7569 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7570 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7571 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7572 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7573 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7574 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7575 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
7576 IRTemp maxFE = newTempV128();
7577 IRTemp maxDC = newTempV128();
7578 IRTemp maxBA = newTempV128();
7579 IRTemp max98 = newTempV128();
7580 IRTemp max76 = newTempV128();
7581 IRTemp max54 = newTempV128();
7582 IRTemp max32 = newTempV128();
7583 IRTemp max10 = newTempV128();
7584 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7585 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7586 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7587 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7588 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7589 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7590 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7591 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
7592 IRTemp maxFEDC = newTempV128();
7593 IRTemp maxBA98 = newTempV128();
7594 IRTemp max7654 = newTempV128();
7595 IRTemp max3210 = newTempV128();
7596 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7597 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7598 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7599 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7600 IRTemp maxFEDCBA98 = newTempV128();
7601 IRTemp max76543210 = newTempV128();
7602 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7603 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7604 IRTemp maxAllLanes = newTempV128();
7605 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7606 mkexpr(max76543210)));
7607 IRTemp res = newTempV128();
7608 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7609 return res;
7610 }
7611 case Iop_Min16Sx8: case Iop_Min16Ux8:
7612 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
7613 IRTemp x76543210 = src;
7614 IRTemp x76547654 = newTempV128();
7615 IRTemp x32103210 = newTempV128();
7616 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7617 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7618 IRTemp x76767676 = newTempV128();
7619 IRTemp x54545454 = newTempV128();
7620 IRTemp x32323232 = newTempV128();
7621 IRTemp x10101010 = newTempV128();
7622 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7623 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7624 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7625 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7626 IRTemp x77777777 = newTempV128();
7627 IRTemp x66666666 = newTempV128();
7628 IRTemp x55555555 = newTempV128();
7629 IRTemp x44444444 = newTempV128();
7630 IRTemp x33333333 = newTempV128();
7631 IRTemp x22222222 = newTempV128();
7632 IRTemp x11111111 = newTempV128();
7633 IRTemp x00000000 = newTempV128();
7634 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7635 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7636 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7637 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7638 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7639 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7640 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7641 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7642 IRTemp max76 = newTempV128();
7643 IRTemp max54 = newTempV128();
7644 IRTemp max32 = newTempV128();
7645 IRTemp max10 = newTempV128();
7646 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
7647 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
7648 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
7649 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
7650 IRTemp max7654 = newTempV128();
7651 IRTemp max3210 = newTempV128();
7652 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7653 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7654 IRTemp max76543210 = newTempV128();
7655 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7656 IRTemp res = newTempV128();
7657 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
7658 return res;
7659 }
7660 case Iop_Max32Fx4: case Iop_Min32Fx4:
7661 case Iop_Min32Sx4: case Iop_Min32Ux4:
7662 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
7663 IRTemp x3210 = src;
7664 IRTemp x3232 = newTempV128();
7665 IRTemp x1010 = newTempV128();
7666 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
7667 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
7668 IRTemp x3333 = newTempV128();
7669 IRTemp x2222 = newTempV128();
7670 IRTemp x1111 = newTempV128();
7671 IRTemp x0000 = newTempV128();
7672 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
7673 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
7674 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
7675 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
7676 IRTemp max32 = newTempV128();
7677 IRTemp max10 = newTempV128();
7678 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
7679 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
7680 IRTemp max3210 = newTempV128();
7681 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7682 IRTemp res = newTempV128();
7683 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
7684 return res;
7685 }
7686 case Iop_Add64x2: {
7687 IRTemp x10 = src;
7688 IRTemp x00 = newTempV128();
7689 IRTemp x11 = newTempV128();
7690 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
7691 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
7692 IRTemp max10 = newTempV128();
7693 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
7694 IRTemp res = newTempV128();
7695 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
7696 return res;
7697 }
7698 default:
7699 vassert(0);
7700 }
7701 }
7702
7703
7704 /* Generate IR for TBL and TBX. This deals with the 128 bit case
7705 only. */
math_TBL_TBX(IRTemp tab[4],UInt len,IRTemp src,IRTemp oor_values)7706 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
7707 IRTemp oor_values )
7708 {
7709 vassert(len >= 0 && len <= 3);
7710
7711 /* Generate some useful constants as concisely as possible. */
7712 IRTemp half15 = newTemp(Ity_I64);
7713 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
7714 IRTemp half16 = newTemp(Ity_I64);
7715 assign(half16, mkU64(0x1010101010101010ULL));
7716
7717 /* A zero vector */
7718 IRTemp allZero = newTempV128();
7719 assign(allZero, mkV128(0x0000));
7720 /* A vector containing 15 in each 8-bit lane */
7721 IRTemp all15 = newTempV128();
7722 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
7723 /* A vector containing 16 in each 8-bit lane */
7724 IRTemp all16 = newTempV128();
7725 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
7726 /* A vector containing 32 in each 8-bit lane */
7727 IRTemp all32 = newTempV128();
7728 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
7729 /* A vector containing 48 in each 8-bit lane */
7730 IRTemp all48 = newTempV128();
7731 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
7732 /* A vector containing 64 in each 8-bit lane */
7733 IRTemp all64 = newTempV128();
7734 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
7735
7736 /* Group the 16/32/48/64 vectors so as to be indexable. */
7737 IRTemp allXX[4] = { all16, all32, all48, all64 };
7738
7739 /* Compute the result for each table vector, with zeroes in places
7740 where the index values are out of range, and OR them into the
7741 running vector. */
7742 IRTemp running_result = newTempV128();
7743 assign(running_result, mkV128(0));
7744
7745 UInt tabent;
7746 for (tabent = 0; tabent <= len; tabent++) {
7747 vassert(tabent >= 0 && tabent < 4);
7748 IRTemp bias = newTempV128();
7749 assign(bias,
7750 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
7751 IRTemp biased_indices = newTempV128();
7752 assign(biased_indices,
7753 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
7754 IRTemp valid_mask = newTempV128();
7755 assign(valid_mask,
7756 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
7757 IRTemp safe_biased_indices = newTempV128();
7758 assign(safe_biased_indices,
7759 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
7760 IRTemp results_or_junk = newTempV128();
7761 assign(results_or_junk,
7762 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
7763 mkexpr(safe_biased_indices)));
7764 IRTemp results_or_zero = newTempV128();
7765 assign(results_or_zero,
7766 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
7767 /* And OR that into the running result. */
7768 IRTemp tmp = newTempV128();
7769 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
7770 mkexpr(running_result)));
7771 running_result = tmp;
7772 }
7773
7774 /* So now running_result holds the overall result where the indices
7775 are in range, and zero in out-of-range lanes. Now we need to
7776 compute an overall validity mask and use this to copy in the
7777 lanes in the oor_values for out of range indices. This is
7778 unnecessary for TBL but will get folded out by iropt, so we lean
7779 on that and generate the same code for TBL and TBX here. */
7780 IRTemp overall_valid_mask = newTempV128();
7781 assign(overall_valid_mask,
7782 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
7783 IRTemp result = newTempV128();
7784 assign(result,
7785 binop(Iop_OrV128,
7786 mkexpr(running_result),
7787 binop(Iop_AndV128,
7788 mkexpr(oor_values),
7789 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
7790 return result;
7791 }
7792
7793
7794 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
7795 an op which takes two I64s and produces a V128. That is, a widening
7796 operator. Generate IR which applies |opI64x2toV128| to either the
7797 lower (if |is2| is False) or upper (if |is2| is True) halves of
7798 |argL| and |argR|, and return the value in a new IRTemp.
7799 */
7800 static
math_BINARY_WIDENING_V128(Bool is2,IROp opI64x2toV128,IRExpr * argL,IRExpr * argR)7801 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
7802 IRExpr* argL, IRExpr* argR )
7803 {
7804 IRTemp res = newTempV128();
7805 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
7806 assign(res, binop(opI64x2toV128, unop(slice, argL),
7807 unop(slice, argR)));
7808 return res;
7809 }
7810
7811
7812 /* Generate signed/unsigned absolute difference vector IR. */
7813 static
math_ABD(Bool isU,UInt size,IRExpr * argLE,IRExpr * argRE)7814 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
7815 {
7816 vassert(size <= 3);
7817 IRTemp argL = newTempV128();
7818 IRTemp argR = newTempV128();
7819 IRTemp msk = newTempV128();
7820 IRTemp res = newTempV128();
7821 assign(argL, argLE);
7822 assign(argR, argRE);
7823 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
7824 mkexpr(argL), mkexpr(argR)));
7825 assign(res,
7826 binop(Iop_OrV128,
7827 binop(Iop_AndV128,
7828 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
7829 mkexpr(msk)),
7830 binop(Iop_AndV128,
7831 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
7832 unop(Iop_NotV128, mkexpr(msk)))));
7833 return res;
7834 }
7835
7836
7837 /* Generate IR that takes a V128 and sign- or zero-widens
7838 either the lower or upper set of lanes to twice-as-wide,
7839 resulting in a new V128 value. */
7840 static
math_WIDEN_LO_OR_HI_LANES(Bool zWiden,Bool fromUpperHalf,UInt sizeNarrow,IRExpr * srcE)7841 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
7842 UInt sizeNarrow, IRExpr* srcE )
7843 {
7844 IRTemp src = newTempV128();
7845 IRTemp res = newTempV128();
7846 assign(src, srcE);
7847 switch (sizeNarrow) {
7848 case X10:
7849 assign(res,
7850 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
7851 binop(fromUpperHalf ? Iop_InterleaveHI32x4
7852 : Iop_InterleaveLO32x4,
7853 mkexpr(src),
7854 mkexpr(src)),
7855 mkU8(32)));
7856 break;
7857 case X01:
7858 assign(res,
7859 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
7860 binop(fromUpperHalf ? Iop_InterleaveHI16x8
7861 : Iop_InterleaveLO16x8,
7862 mkexpr(src),
7863 mkexpr(src)),
7864 mkU8(16)));
7865 break;
7866 case X00:
7867 assign(res,
7868 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
7869 binop(fromUpperHalf ? Iop_InterleaveHI8x16
7870 : Iop_InterleaveLO8x16,
7871 mkexpr(src),
7872 mkexpr(src)),
7873 mkU8(8)));
7874 break;
7875 default:
7876 vassert(0);
7877 }
7878 return res;
7879 }
7880
7881
7882 /* Generate IR that takes a V128 and sign- or zero-widens
7883 either the even or odd lanes to twice-as-wide,
7884 resulting in a new V128 value. */
7885 static
math_WIDEN_EVEN_OR_ODD_LANES(Bool zWiden,Bool fromOdd,UInt sizeNarrow,IRExpr * srcE)7886 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
7887 UInt sizeNarrow, IRExpr* srcE )
7888 {
7889 IRTemp src = newTempV128();
7890 IRTemp res = newTempV128();
7891 IROp opSAR = mkVecSARN(sizeNarrow+1);
7892 IROp opSHR = mkVecSHRN(sizeNarrow+1);
7893 IROp opSHL = mkVecSHLN(sizeNarrow+1);
7894 IROp opSxR = zWiden ? opSHR : opSAR;
7895 UInt amt = 0;
7896 switch (sizeNarrow) {
7897 case X10: amt = 32; break;
7898 case X01: amt = 16; break;
7899 case X00: amt = 8; break;
7900 default: vassert(0);
7901 }
7902 assign(src, srcE);
7903 if (fromOdd) {
7904 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
7905 } else {
7906 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
7907 mkU8(amt)));
7908 }
7909 return res;
7910 }
7911
7912
7913 /* Generate IR that takes two V128s and narrows (takes lower half)
7914 of each lane, producing a single V128 value. */
7915 static
math_NARROW_LANES(IRTemp argHi,IRTemp argLo,UInt sizeNarrow)7916 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
7917 {
7918 IRTemp res = newTempV128();
7919 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
7920 mkexpr(argHi), mkexpr(argLo)));
7921 return res;
7922 }
7923
7924
7925 /* Return a temp which holds the vector dup of the lane of width
7926 (1 << size) obtained from src[laneNo]. */
7927 static
math_DUP_VEC_ELEM(IRExpr * src,UInt size,UInt laneNo)7928 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
7929 {
7930 vassert(size <= 3);
7931 /* Normalise |laneNo| so it is of the form
7932 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
7933 This puts the bits we want to inspect at constant offsets
7934 regardless of the value of |size|.
7935 */
7936 UInt ix = laneNo << size;
7937 vassert(ix <= 15);
7938 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
7939 switch (size) {
7940 case 0: /* B */
7941 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
7942 /* fallthrough */
7943 case 1: /* H */
7944 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
7945 /* fallthrough */
7946 case 2: /* S */
7947 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
7948 /* fallthrough */
7949 case 3: /* D */
7950 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
7951 break;
7952 default:
7953 vassert(0);
7954 }
7955 IRTemp res = newTempV128();
7956 assign(res, src);
7957 Int i;
7958 for (i = 3; i >= 0; i--) {
7959 if (ops[i] == Iop_INVALID)
7960 break;
7961 IRTemp tmp = newTempV128();
7962 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
7963 res = tmp;
7964 }
7965 return res;
7966 }
7967
7968
7969 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
7970 selector encoded as shown below. Return a new V128 holding the
7971 selected lane from |srcV| dup'd out to V128, and also return the
7972 lane number, log2 of the lane size in bytes, and width-character via
7973 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
7974 is an invalid selector, in which case return
7975 IRTemp_INVALID, 0, 0 and '?' respectively.
7976
7977 imm5 = xxxx1 signifies .b[xxxx]
7978 = xxx10 .h[xxx]
7979 = xx100 .s[xx]
7980 = x1000 .d[x]
7981 otherwise invalid
7982 */
7983 static
handle_DUP_VEC_ELEM(UInt * laneNo,UInt * laneSzLg2,HChar * laneCh,IRExpr * srcV,UInt imm5)7984 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
7985 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
7986 IRExpr* srcV, UInt imm5 )
7987 {
7988 *laneNo = 0;
7989 *laneSzLg2 = 0;
7990 *laneCh = '?';
7991
7992 if (imm5 & 1) {
7993 *laneNo = (imm5 >> 1) & 15;
7994 *laneSzLg2 = 0;
7995 *laneCh = 'b';
7996 }
7997 else if (imm5 & 2) {
7998 *laneNo = (imm5 >> 2) & 7;
7999 *laneSzLg2 = 1;
8000 *laneCh = 'h';
8001 }
8002 else if (imm5 & 4) {
8003 *laneNo = (imm5 >> 3) & 3;
8004 *laneSzLg2 = 2;
8005 *laneCh = 's';
8006 }
8007 else if (imm5 & 8) {
8008 *laneNo = (imm5 >> 4) & 1;
8009 *laneSzLg2 = 3;
8010 *laneCh = 'd';
8011 }
8012 else {
8013 /* invalid */
8014 return IRTemp_INVALID;
8015 }
8016
8017 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
8018 }
8019
8020
8021 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
8022 static
math_VEC_DUP_IMM(UInt size,ULong imm)8023 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
8024 {
8025 IRType ty = Ity_INVALID;
8026 IRTemp rcS = IRTemp_INVALID;
8027 switch (size) {
8028 case X01:
8029 vassert(imm <= 0xFFFFULL);
8030 ty = Ity_I16;
8031 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
8032 break;
8033 case X10:
8034 vassert(imm <= 0xFFFFFFFFULL);
8035 ty = Ity_I32;
8036 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
8037 break;
8038 case X11:
8039 ty = Ity_I64;
8040 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
8041 default:
8042 vassert(0);
8043 }
8044 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
8045 return rcV;
8046 }
8047
8048
8049 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
8050 and the upper can contain any value -- it is ignored. If |is2| is False,
8051 generate IR to put |new64| in the lower half of vector reg |dd| and zero
8052 the upper half. If |is2| is True, generate IR to put |new64| in the upper
8053 half of vector reg |dd| and leave the lower half unchanged. This
8054 simulates the behaviour of the "foo/foo2" instructions in which the
8055 destination is half the width of sources, for example addhn/addhn2.
8056 */
8057 static
putLO64andZUorPutHI64(Bool is2,UInt dd,IRTemp new64)8058 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
8059 {
8060 if (is2) {
8061 /* Get the old contents of Vdd, zero the upper half, and replace
8062 it with 'x'. */
8063 IRTemp t_zero_oldLO = newTempV128();
8064 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
8065 IRTemp t_newHI_zero = newTempV128();
8066 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
8067 mkV128(0x0000)));
8068 IRTemp res = newTempV128();
8069 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
8070 mkexpr(t_newHI_zero)));
8071 putQReg128(dd, mkexpr(res));
8072 } else {
8073 /* This is simple. */
8074 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
8075 }
8076 }
8077
8078
8079 /* Compute vector SQABS at lane size |size| for |srcE|, returning
8080 the q result in |*qabs| and the normal result in |*nabs|. */
8081 static
math_SQABS(IRTemp * qabs,IRTemp * nabs,IRExpr * srcE,UInt size)8082 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
8083 IRExpr* srcE, UInt size )
8084 {
8085 IRTemp src, mask, maskn, nsub, qsub;
8086 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
8087 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
8088 assign(src, srcE);
8089 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
8090 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
8091 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
8092 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
8093 assign(*nabs, binop(Iop_OrV128,
8094 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
8095 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
8096 assign(*qabs, binop(Iop_OrV128,
8097 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
8098 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
8099 }
8100
8101
8102 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
8103 the q result in |*qneg| and the normal result in |*nneg|. */
8104 static
math_SQNEG(IRTemp * qneg,IRTemp * nneg,IRExpr * srcE,UInt size)8105 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
8106 IRExpr* srcE, UInt size )
8107 {
8108 IRTemp src = IRTemp_INVALID;
8109 newTempsV128_3(&src, nneg, qneg);
8110 assign(src, srcE);
8111 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
8112 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
8113 }
8114
8115
8116 /* Zero all except the least significant lane of |srcE|, where |size|
8117 indicates the lane size in the usual way. */
math_ZERO_ALL_EXCEPT_LOWEST_LANE(UInt size,IRExpr * srcE)8118 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
8119 {
8120 vassert(size < 4);
8121 IRTemp t = newTempV128();
8122 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
8123 return t;
8124 }
8125
8126
8127 /* Generate IR to compute vector widening MULL from either the lower
8128 (is2==False) or upper (is2==True) halves of vecN and vecM. The
8129 widening multiplies are unsigned when isU==True and signed when
8130 isU==False. |size| is the narrow lane size indication. Optionally,
8131 the product may be added to or subtracted from vecD, at the wide lane
8132 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
8133 is 'm' (only multiply) then the accumulate part does not happen, and
8134 |vecD| is expected to == IRTemp_INVALID.
8135
8136 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
8137 are allowed. The result is returned in a new IRTemp, which is
8138 returned in *res. */
8139 static
math_MULL_ACC(IRTemp * res,Bool is2,Bool isU,UInt size,HChar mas,IRTemp vecN,IRTemp vecM,IRTemp vecD)8140 void math_MULL_ACC ( /*OUT*/IRTemp* res,
8141 Bool is2, Bool isU, UInt size, HChar mas,
8142 IRTemp vecN, IRTemp vecM, IRTemp vecD )
8143 {
8144 vassert(res && *res == IRTemp_INVALID);
8145 vassert(size <= 2);
8146 vassert(mas == 'm' || mas == 'a' || mas == 's');
8147 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
8148 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
8149 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
8150 : (mas == 's' ? mkVecSUB(size+1)
8151 : Iop_INVALID);
8152 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
8153 mkexpr(vecN), mkexpr(vecM));
8154 *res = newTempV128();
8155 assign(*res, mas == 'm' ? mkexpr(mul)
8156 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
8157 }
8158
8159
8160 /* Same as math_MULL_ACC, except the multiply is signed widening,
8161 the multiplied value is then doubled, before being added to or
8162 subtracted from the accumulated value. And everything is
8163 saturated. In all cases, saturation residuals are returned
8164 via (sat1q, sat1n), and in the accumulate cases,
8165 via (sat2q, sat2n) too. All results are returned in new temporaries.
8166 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
8167 so the caller can tell this has happened. */
8168 static
math_SQDMULL_ACC(IRTemp * res,IRTemp * sat1q,IRTemp * sat1n,IRTemp * sat2q,IRTemp * sat2n,Bool is2,UInt size,HChar mas,IRTemp vecN,IRTemp vecM,IRTemp vecD)8169 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
8170 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8171 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
8172 Bool is2, UInt size, HChar mas,
8173 IRTemp vecN, IRTemp vecM, IRTemp vecD )
8174 {
8175 vassert(size <= 2);
8176 vassert(mas == 'm' || mas == 'a' || mas == 's');
8177 /* Compute
8178 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
8179 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
8180 IOW take either the low or high halves of vecN and vecM, signed widen,
8181 multiply, double that, and signedly saturate. Also compute the same
8182 but without saturation.
8183 */
8184 vassert(sat2q && *sat2q == IRTemp_INVALID);
8185 vassert(sat2n && *sat2n == IRTemp_INVALID);
8186 newTempsV128_3(sat1q, sat1n, res);
8187 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
8188 mkexpr(vecN), mkexpr(vecM));
8189 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
8190 mkexpr(vecN), mkexpr(vecM));
8191 assign(*sat1q, mkexpr(tq));
8192 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
8193
8194 /* If there is no accumulation, the final result is sat1q,
8195 and there's no assignment to sat2q or sat2n. */
8196 if (mas == 'm') {
8197 assign(*res, mkexpr(*sat1q));
8198 return;
8199 }
8200
8201 /* Compute
8202 sat2q = vecD +sq/-sq sat1q
8203 sat2n = vecD +/- sat1n
8204 result = sat2q
8205 */
8206 newTempsV128_2(sat2q, sat2n);
8207 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
8208 mkexpr(vecD), mkexpr(*sat1q)));
8209 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
8210 mkexpr(vecD), mkexpr(*sat1n)));
8211 assign(*res, mkexpr(*sat2q));
8212 }
8213
8214
8215 /* Generate IR for widening signed vector multiplies. The operands
8216 have their lane width signedly widened, and they are then multiplied
8217 at the wider width, returning results in two new IRTemps. */
8218 static
math_MULLS(IRTemp * resHI,IRTemp * resLO,UInt sizeNarrow,IRTemp argL,IRTemp argR)8219 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
8220 UInt sizeNarrow, IRTemp argL, IRTemp argR )
8221 {
8222 vassert(sizeNarrow <= 2);
8223 newTempsV128_2(resHI, resLO);
8224 IRTemp argLhi = newTemp(Ity_I64);
8225 IRTemp argLlo = newTemp(Ity_I64);
8226 IRTemp argRhi = newTemp(Ity_I64);
8227 IRTemp argRlo = newTemp(Ity_I64);
8228 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
8229 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
8230 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
8231 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
8232 IROp opMulls = mkVecMULLS(sizeNarrow);
8233 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
8234 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
8235 }
8236
8237
8238 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
8239 double that, possibly add a rounding constant (R variants), and take
8240 the high half. */
8241 static
math_SQDMULH(IRTemp * res,IRTemp * sat1q,IRTemp * sat1n,Bool isR,UInt size,IRTemp vN,IRTemp vM)8242 void math_SQDMULH ( /*OUT*/IRTemp* res,
8243 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8244 Bool isR, UInt size, IRTemp vN, IRTemp vM )
8245 {
8246 vassert(size == X01 || size == X10); /* s or h only */
8247
8248 newTempsV128_3(res, sat1q, sat1n);
8249
8250 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
8251 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
8252
8253 IRTemp addWide = mkVecADD(size+1);
8254
8255 if (isR) {
8256 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8257
8258 Int rcShift = size == X01 ? 15 : 31;
8259 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
8260 assign(*sat1n,
8261 binop(mkVecCATODDLANES(size),
8262 binop(addWide,
8263 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8264 mkexpr(roundConst)),
8265 binop(addWide,
8266 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
8267 mkexpr(roundConst))));
8268 } else {
8269 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8270
8271 assign(*sat1n,
8272 binop(mkVecCATODDLANES(size),
8273 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8274 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
8275 }
8276
8277 assign(*res, mkexpr(*sat1q));
8278 }
8279
8280
8281 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
8282 a new temp in *res, and the Q difference pair in new temps in
8283 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
8284 three operations it is. */
8285 static
math_QSHL_IMM(IRTemp * res,IRTemp * qDiff1,IRTemp * qDiff2,IRTemp src,UInt size,UInt shift,const HChar * nm)8286 void math_QSHL_IMM ( /*OUT*/IRTemp* res,
8287 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
8288 IRTemp src, UInt size, UInt shift, const HChar* nm )
8289 {
8290 vassert(size <= 3);
8291 UInt laneBits = 8 << size;
8292 vassert(shift < laneBits);
8293 newTempsV128_3(res, qDiff1, qDiff2);
8294 IRTemp z128 = newTempV128();
8295 assign(z128, mkV128(0x0000));
8296
8297 /* UQSHL */
8298 if (vex_streq(nm, "uqshl")) {
8299 IROp qop = mkVecQSHLNSATUU(size);
8300 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8301 if (shift == 0) {
8302 /* No shift means no saturation. */
8303 assign(*qDiff1, mkexpr(z128));
8304 assign(*qDiff2, mkexpr(z128));
8305 } else {
8306 /* Saturation has occurred if any of the shifted-out bits are
8307 nonzero. We get the shifted-out bits by right-shifting the
8308 original value. */
8309 UInt rshift = laneBits - shift;
8310 vassert(rshift >= 1 && rshift < laneBits);
8311 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8312 assign(*qDiff2, mkexpr(z128));
8313 }
8314 return;
8315 }
8316
8317 /* SQSHL */
8318 if (vex_streq(nm, "sqshl")) {
8319 IROp qop = mkVecQSHLNSATSS(size);
8320 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8321 if (shift == 0) {
8322 /* No shift means no saturation. */
8323 assign(*qDiff1, mkexpr(z128));
8324 assign(*qDiff2, mkexpr(z128));
8325 } else {
8326 /* Saturation has occurred if any of the shifted-out bits are
8327 different from the top bit of the original value. */
8328 UInt rshift = laneBits - 1 - shift;
8329 vassert(rshift >= 0 && rshift < laneBits-1);
8330 /* qDiff1 is the shifted out bits, and the top bit of the original
8331 value, preceded by zeroes. */
8332 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8333 /* qDiff2 is the top bit of the original value, cloned the
8334 correct number of times. */
8335 assign(*qDiff2, binop(mkVecSHRN(size),
8336 binop(mkVecSARN(size), mkexpr(src),
8337 mkU8(laneBits-1)),
8338 mkU8(rshift)));
8339 /* This also succeeds in comparing the top bit of the original
8340 value to itself, which is a bit stupid, but not wrong. */
8341 }
8342 return;
8343 }
8344
8345 /* SQSHLU */
8346 if (vex_streq(nm, "sqshlu")) {
8347 IROp qop = mkVecQSHLNSATSU(size);
8348 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8349 if (shift == 0) {
8350 /* If there's no shift, saturation depends on the top bit
8351 of the source. */
8352 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
8353 assign(*qDiff2, mkexpr(z128));
8354 } else {
8355 /* Saturation has occurred if any of the shifted-out bits are
8356 nonzero. We get the shifted-out bits by right-shifting the
8357 original value. */
8358 UInt rshift = laneBits - shift;
8359 vassert(rshift >= 1 && rshift < laneBits);
8360 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8361 assign(*qDiff2, mkexpr(z128));
8362 }
8363 return;
8364 }
8365
8366 vassert(0);
8367 }
8368
8369
8370 /* Generate IR to do SRHADD and URHADD. */
8371 static
math_RHADD(UInt size,Bool isU,IRTemp aa,IRTemp bb)8372 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8373 {
8374 /* Generate this:
8375 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8376 */
8377 vassert(size <= 3);
8378 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8379 IROp opADD = mkVecADD(size);
8380 /* The only tricky bit is to generate the correct vector 1 constant. */
8381 const ULong ones64[4]
8382 = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8383 0x0000000100000001ULL, 0x0000000000000001ULL };
8384 IRTemp imm64 = newTemp(Ity_I64);
8385 assign(imm64, mkU64(ones64[size]));
8386 IRTemp vecOne = newTempV128();
8387 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8388 IRTemp scaOne = newTemp(Ity_I8);
8389 assign(scaOne, mkU8(1));
8390 IRTemp res = newTempV128();
8391 assign(res,
8392 binop(opADD,
8393 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8394 binop(opADD,
8395 binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8396 binop(opSHR,
8397 binop(opADD,
8398 binop(opADD,
8399 binop(Iop_AndV128, mkexpr(aa),
8400 mkexpr(vecOne)),
8401 binop(Iop_AndV128, mkexpr(bb),
8402 mkexpr(vecOne))
8403 ),
8404 mkexpr(vecOne)
8405 ),
8406 mkexpr(scaOne)
8407 )
8408 )
8409 )
8410 );
8411 return res;
8412 }
8413
8414
8415 /* QCFLAG tracks the SIMD sticky saturation status. Update the status
8416 thusly: if, after application of |opZHI| to both |qres| and |nres|,
8417 they have the same value, leave QCFLAG unchanged. Otherwise, set it
8418 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
8419 operators, or Iop_INVALID, in which case |qres| and |nres| are used
8420 unmodified. The presence |opZHI| means this function can be used to
8421 generate QCFLAG update code for both scalar and vector SIMD operations.
8422 */
8423 static
updateQCFLAGwithDifferenceZHI(IRTemp qres,IRTemp nres,IROp opZHI)8424 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
8425 {
8426 IRTemp diff = newTempV128();
8427 IRTemp oldQCFLAG = newTempV128();
8428 IRTemp newQCFLAG = newTempV128();
8429 if (opZHI == Iop_INVALID) {
8430 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8431 } else {
8432 vassert(opZHI == Iop_ZeroHI64ofV128
8433 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
8434 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8435 }
8436 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8437 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8438 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8439 }
8440
8441
8442 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8443 are used unmodified, hence suitable for QCFLAG updates for whole-vector
8444 operations. */
8445 static
updateQCFLAGwithDifference(IRTemp qres,IRTemp nres)8446 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8447 {
8448 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8449 }
8450
8451
8452 /* Generate IR to rearrange two vector values in a way which is useful
8453 for doing S/D add-pair etc operations. There are 3 cases:
8454
8455 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
8456
8457 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
8458
8459 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
8460
8461 The cases are distinguished as follows:
8462 isD == True, bitQ == 1 => 2d
8463 isD == False, bitQ == 1 => 4s
8464 isD == False, bitQ == 0 => 2s
8465 */
8466 static
math_REARRANGE_FOR_FLOATING_PAIRWISE(IRTemp * rearrL,IRTemp * rearrR,IRTemp vecM,IRTemp vecN,Bool isD,UInt bitQ)8467 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8468 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8469 IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
8470 )
8471 {
8472 vassert(rearrL && *rearrL == IRTemp_INVALID);
8473 vassert(rearrR && *rearrR == IRTemp_INVALID);
8474 *rearrL = newTempV128();
8475 *rearrR = newTempV128();
8476 if (isD) {
8477 // 2d case
8478 vassert(bitQ == 1);
8479 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8480 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8481 }
8482 else if (!isD && bitQ == 1) {
8483 // 4s case
8484 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8485 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8486 } else {
8487 // 2s case
8488 vassert(!isD && bitQ == 0);
8489 IRTemp m1n1m0n0 = newTempV128();
8490 IRTemp m0n0m1n1 = newTempV128();
8491 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8492 mkexpr(vecM), mkexpr(vecN)));
8493 assign(m0n0m1n1, triop(Iop_SliceV128,
8494 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8495 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8496 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8497 }
8498 }
8499
8500
8501 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
two_to_the_minus(Int n)8502 static Double two_to_the_minus ( Int n )
8503 {
8504 if (n == 1) return 0.5;
8505 vassert(n >= 2 && n <= 64);
8506 Int half = n / 2;
8507 return two_to_the_minus(half) * two_to_the_minus(n - half);
8508 }
8509
8510
8511 /* Returns 2.0 ^ n for n in 1 .. 64 */
two_to_the_plus(Int n)8512 static Double two_to_the_plus ( Int n )
8513 {
8514 if (n == 1) return 2.0;
8515 vassert(n >= 2 && n <= 64);
8516 Int half = n / 2;
8517 return two_to_the_plus(half) * two_to_the_plus(n - half);
8518 }
8519
8520
8521 /*------------------------------------------------------------*/
8522 /*--- SIMD and FP instructions ---*/
8523 /*------------------------------------------------------------*/
8524
8525 static
dis_AdvSIMD_EXT(DisResult * dres,UInt insn)8526 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
8527 {
8528 /* 31 29 23 21 20 15 14 10 9 4
8529 0 q 101110 op2 0 m 0 imm4 0 n d
8530 Decode fields: op2
8531 */
8532 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8533 if (INSN(31,31) != 0
8534 || INSN(29,24) != BITS6(1,0,1,1,1,0)
8535 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8536 return False;
8537 }
8538 UInt bitQ = INSN(30,30);
8539 UInt op2 = INSN(23,22);
8540 UInt mm = INSN(20,16);
8541 UInt imm4 = INSN(14,11);
8542 UInt nn = INSN(9,5);
8543 UInt dd = INSN(4,0);
8544
8545 if (op2 == BITS2(0,0)) {
8546 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
8547 IRTemp sHi = newTempV128();
8548 IRTemp sLo = newTempV128();
8549 IRTemp res = newTempV128();
8550 assign(sHi, getQReg128(mm));
8551 assign(sLo, getQReg128(nn));
8552 if (bitQ == 1) {
8553 if (imm4 == 0) {
8554 assign(res, mkexpr(sLo));
8555 } else {
8556 vassert(imm4 >= 1 && imm4 <= 15);
8557 assign(res, triop(Iop_SliceV128,
8558 mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
8559 }
8560 putQReg128(dd, mkexpr(res));
8561 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8562 } else {
8563 if (imm4 >= 8) return False;
8564 if (imm4 == 0) {
8565 assign(res, mkexpr(sLo));
8566 } else {
8567 vassert(imm4 >= 1 && imm4 <= 7);
8568 IRTemp hi64lo64 = newTempV128();
8569 assign(hi64lo64, binop(Iop_InterleaveLO64x2,
8570 mkexpr(sHi), mkexpr(sLo)));
8571 assign(res, triop(Iop_SliceV128,
8572 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
8573 }
8574 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8575 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
8576 }
8577 return True;
8578 }
8579
8580 return False;
8581 # undef INSN
8582 }
8583
8584
8585 static
dis_AdvSIMD_TBL_TBX(DisResult * dres,UInt insn)8586 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
8587 {
8588 /* 31 29 23 21 20 15 14 12 11 9 4
8589 0 q 001110 op2 0 m 0 len op 00 n d
8590 Decode fields: op2,len,op
8591 */
8592 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8593 if (INSN(31,31) != 0
8594 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8595 || INSN(21,21) != 0
8596 || INSN(15,15) != 0
8597 || INSN(11,10) != BITS2(0,0)) {
8598 return False;
8599 }
8600 UInt bitQ = INSN(30,30);
8601 UInt op2 = INSN(23,22);
8602 UInt mm = INSN(20,16);
8603 UInt len = INSN(14,13);
8604 UInt bitOP = INSN(12,12);
8605 UInt nn = INSN(9,5);
8606 UInt dd = INSN(4,0);
8607
8608 if (op2 == X00) {
8609 /* -------- 00,xx,0 TBL, xx register table -------- */
8610 /* -------- 00,xx,1 TBX, xx register table -------- */
8611 /* 31 28 20 15 14 12 9 4
8612 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8613 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8614 where Ta = 16b(q=1) or 8b(q=0)
8615 */
8616 Bool isTBX = bitOP == 1;
8617 /* The out-of-range values to use. */
8618 IRTemp oor_values = newTempV128();
8619 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
8620 /* src value */
8621 IRTemp src = newTempV128();
8622 assign(src, getQReg128(mm));
8623 /* The table values */
8624 IRTemp tab[4];
8625 UInt i;
8626 for (i = 0; i <= len; i++) {
8627 vassert(i < 4);
8628 tab[i] = newTempV128();
8629 assign(tab[i], getQReg128((nn + i) % 32));
8630 }
8631 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
8632 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8633 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
8634 const HChar* nm = isTBX ? "tbx" : "tbl";
8635 DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n",
8636 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
8637 return True;
8638 }
8639
8640 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8641 return False;
8642 # undef INSN
8643 }
8644
8645
8646 static
dis_AdvSIMD_ZIP_UZP_TRN(DisResult * dres,UInt insn)8647 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
8648 {
8649 /* 31 29 23 21 20 15 14 11 9 4
8650 0 q 001110 size 0 m 0 opcode 10 n d
8651 Decode fields: opcode
8652 */
8653 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8654 if (INSN(31,31) != 0
8655 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8656 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8657 return False;
8658 }
8659 UInt bitQ = INSN(30,30);
8660 UInt size = INSN(23,22);
8661 UInt mm = INSN(20,16);
8662 UInt opcode = INSN(14,12);
8663 UInt nn = INSN(9,5);
8664 UInt dd = INSN(4,0);
8665
8666 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
8667 /* -------- 001 UZP1 std7_std7_std7 -------- */
8668 /* -------- 101 UZP2 std7_std7_std7 -------- */
8669 if (bitQ == 0 && size == X11) return False; // implied 1d case
8670 Bool isUZP1 = opcode == BITS3(0,0,1);
8671 IROp op = isUZP1 ? mkVecCATEVENLANES(size)
8672 : mkVecCATODDLANES(size);
8673 IRTemp preL = newTempV128();
8674 IRTemp preR = newTempV128();
8675 IRTemp res = newTempV128();
8676 if (bitQ == 0) {
8677 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
8678 getQReg128(nn)));
8679 assign(preR, mkexpr(preL));
8680 } else {
8681 assign(preL, getQReg128(mm));
8682 assign(preR, getQReg128(nn));
8683 }
8684 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8685 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8686 const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
8687 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8688 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8689 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8690 return True;
8691 }
8692
8693 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
8694 /* -------- 010 TRN1 std7_std7_std7 -------- */
8695 /* -------- 110 TRN2 std7_std7_std7 -------- */
8696 if (bitQ == 0 && size == X11) return False; // implied 1d case
8697 Bool isTRN1 = opcode == BITS3(0,1,0);
8698 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
8699 : mkVecCATODDLANES(size);
8700 IROp op2 = mkVecINTERLEAVEHI(size);
8701 IRTemp srcM = newTempV128();
8702 IRTemp srcN = newTempV128();
8703 IRTemp res = newTempV128();
8704 assign(srcM, getQReg128(mm));
8705 assign(srcN, getQReg128(nn));
8706 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
8707 binop(op1, mkexpr(srcN), mkexpr(srcN))));
8708 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8709 const HChar* nm = isTRN1 ? "trn1" : "trn2";
8710 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8711 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8712 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8713 return True;
8714 }
8715
8716 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
8717 /* -------- 011 ZIP1 std7_std7_std7 -------- */
8718 /* -------- 111 ZIP2 std7_std7_std7 -------- */
8719 if (bitQ == 0 && size == X11) return False; // implied 1d case
8720 Bool isZIP1 = opcode == BITS3(0,1,1);
8721 IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
8722 : mkVecINTERLEAVEHI(size);
8723 IRTemp preL = newTempV128();
8724 IRTemp preR = newTempV128();
8725 IRTemp res = newTempV128();
8726 if (bitQ == 0 && !isZIP1) {
8727 IRTemp z128 = newTempV128();
8728 assign(z128, mkV128(0x0000));
8729 // preL = Vm shifted left 32 bits
8730 // preR = Vn shifted left 32 bits
8731 assign(preL, triop(Iop_SliceV128,
8732 getQReg128(mm), mkexpr(z128), mkU8(12)));
8733 assign(preR, triop(Iop_SliceV128,
8734 getQReg128(nn), mkexpr(z128), mkU8(12)));
8735
8736 } else {
8737 assign(preL, getQReg128(mm));
8738 assign(preR, getQReg128(nn));
8739 }
8740 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8741 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8742 const HChar* nm = isZIP1 ? "zip1" : "zip2";
8743 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8744 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8745 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8746 return True;
8747 }
8748
8749 return False;
8750 # undef INSN
8751 }
8752
8753
8754 static
dis_AdvSIMD_across_lanes(DisResult * dres,UInt insn)8755 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
8756 {
8757 /* 31 28 23 21 16 11 9 4
8758 0 q u 01110 size 11000 opcode 10 n d
8759 Decode fields: u,size,opcode
8760 */
8761 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8762 if (INSN(31,31) != 0
8763 || INSN(28,24) != BITS5(0,1,1,1,0)
8764 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
8765 return False;
8766 }
8767 UInt bitQ = INSN(30,30);
8768 UInt bitU = INSN(29,29);
8769 UInt size = INSN(23,22);
8770 UInt opcode = INSN(16,12);
8771 UInt nn = INSN(9,5);
8772 UInt dd = INSN(4,0);
8773
8774 if (opcode == BITS5(0,0,0,1,1)) {
8775 /* -------- 0,xx,00011 SADDLV -------- */
8776 /* -------- 1,xx,00011 UADDLV -------- */
8777 /* size is the narrow size */
8778 if (size == X11 || (size == X10 && bitQ == 0)) return False;
8779 Bool isU = bitU == 1;
8780 IRTemp src = newTempV128();
8781 assign(src, getQReg128(nn));
8782 /* The basic plan is to widen the lower half, and if Q = 1,
8783 the upper half too. Add them together (if Q = 1), and in
8784 either case fold with add at twice the lane width.
8785 */
8786 IRExpr* widened
8787 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
8788 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
8789 if (bitQ == 1) {
8790 widened
8791 = binop(mkVecADD(size+1),
8792 widened,
8793 mkexpr(math_WIDEN_LO_OR_HI_LANES(
8794 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
8795 );
8796 }
8797 /* Now fold. */
8798 IRTemp tWi = newTempV128();
8799 assign(tWi, widened);
8800 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
8801 putQReg128(dd, mkexpr(res));
8802 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8803 const HChar ch = "bhsd"[size];
8804 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
8805 nameQReg128(dd), ch, nameQReg128(nn), arr);
8806 return True;
8807 }
8808
8809 UInt ix = 0;
8810 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
8811 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
8812 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
8813 /**/
8814 if (ix != 0) {
8815 /* -------- 0,xx,01010: SMAXV -------- (1) */
8816 /* -------- 1,xx,01010: UMAXV -------- (2) */
8817 /* -------- 0,xx,11010: SMINV -------- (3) */
8818 /* -------- 1,xx,11010: UMINV -------- (4) */
8819 /* -------- 0,xx,11011: ADDV -------- (5) */
8820 vassert(ix >= 1 && ix <= 5);
8821 if (size == X11) return False; // 1d,2d cases not allowed
8822 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
8823 const IROp opMAXS[3]
8824 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
8825 const IROp opMAXU[3]
8826 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
8827 const IROp opMINS[3]
8828 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
8829 const IROp opMINU[3]
8830 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
8831 const IROp opADD[3]
8832 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
8833 vassert(size < 3);
8834 IROp op = Iop_INVALID;
8835 const HChar* nm = NULL;
8836 switch (ix) {
8837 case 1: op = opMAXS[size]; nm = "smaxv"; break;
8838 case 2: op = opMAXU[size]; nm = "umaxv"; break;
8839 case 3: op = opMINS[size]; nm = "sminv"; break;
8840 case 4: op = opMINU[size]; nm = "uminv"; break;
8841 case 5: op = opADD[size]; nm = "addv"; break;
8842 default: vassert(0);
8843 }
8844 vassert(op != Iop_INVALID && nm != NULL);
8845 IRTemp tN1 = newTempV128();
8846 assign(tN1, getQReg128(nn));
8847 /* If Q == 0, we're just folding lanes in the lower half of
8848 the value. In which case, copy the lower half of the
8849 source into the upper half, so we can then treat it the
8850 same as the full width case. Except for the addition case,
8851 in which we have to zero out the upper half. */
8852 IRTemp tN2 = newTempV128();
8853 assign(tN2, bitQ == 0
8854 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
8855 : mk_CatEvenLanes64x2(tN1,tN1))
8856 : mkexpr(tN1));
8857 IRTemp res = math_FOLDV(tN2, op);
8858 if (res == IRTemp_INVALID)
8859 return False; /* means math_FOLDV
8860 doesn't handle this case yet */
8861 putQReg128(dd, mkexpr(res));
8862 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
8863 IRType laneTy = tys[size];
8864 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8865 DIP("%s %s, %s.%s\n", nm,
8866 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
8867 return True;
8868 }
8869
8870 if ((size == X00 || size == X10)
8871 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
8872 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
8873 /* -------- 0,10,01100: FMINMNV s_4s -------- */
8874 /* -------- 1,00,01111: FMAXV s_4s -------- */
8875 /* -------- 1,10,01111: FMINV s_4s -------- */
8876 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
8877 if (bitQ == 0) return False; // Only 4s is allowed
8878 Bool isMIN = (size & 2) == 2;
8879 Bool isNM = opcode == BITS5(0,1,1,0,0);
8880 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
8881 IRTemp src = newTempV128();
8882 assign(src, getQReg128(nn));
8883 IRTemp res = math_FOLDV(src, opMXX);
8884 putQReg128(dd, mkexpr(res));
8885 DIP("%s%sv s%u, %u.4s\n",
8886 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
8887 return True;
8888 }
8889
8890 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8891 return False;
8892 # undef INSN
8893 }
8894
8895
8896 static
dis_AdvSIMD_copy(DisResult * dres,UInt insn)8897 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8898 {
8899 /* 31 28 20 15 14 10 9 4
8900 0 q op 01110000 imm5 0 imm4 1 n d
8901 Decode fields: q,op,imm4
8902 */
8903 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8904 if (INSN(31,31) != 0
8905 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
8906 || INSN(15,15) != 0 || INSN(10,10) != 1) {
8907 return False;
8908 }
8909 UInt bitQ = INSN(30,30);
8910 UInt bitOP = INSN(29,29);
8911 UInt imm5 = INSN(20,16);
8912 UInt imm4 = INSN(14,11);
8913 UInt nn = INSN(9,5);
8914 UInt dd = INSN(4,0);
8915
8916 /* -------- x,0,0000: DUP (element, vector) -------- */
8917 /* 31 28 20 15 9 4
8918 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
8919 */
8920 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
8921 UInt laneNo = 0;
8922 UInt laneSzLg2 = 0;
8923 HChar laneCh = '?';
8924 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
8925 getQReg128(nn), imm5);
8926 if (res == IRTemp_INVALID)
8927 return False;
8928 if (bitQ == 0 && laneSzLg2 == X11)
8929 return False; /* .1d case */
8930 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8931 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
8932 DIP("dup %s.%s, %s.%c[%u]\n",
8933 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
8934 return True;
8935 }
8936
8937 /* -------- x,0,0001: DUP (general, vector) -------- */
8938 /* 31 28 20 15 9 4
8939 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
8940 Q=0 writes 64, Q=1 writes 128
8941 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
8942 xxx10 4H(q=0) or 8H(q=1), R=W
8943 xx100 2S(q=0) or 4S(q=1), R=W
8944 x1000 Invalid(q=0) or 2D(q=1), R=X
8945 x0000 Invalid(q=0) or Invalid(q=1)
8946 Require op=0, imm4=0001
8947 */
8948 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
8949 Bool isQ = bitQ == 1;
8950 IRTemp w0 = newTemp(Ity_I64);
8951 const HChar* arT = "??";
8952 IRType laneTy = Ity_INVALID;
8953 if (imm5 & 1) {
8954 arT = isQ ? "16b" : "8b";
8955 laneTy = Ity_I8;
8956 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
8957 }
8958 else if (imm5 & 2) {
8959 arT = isQ ? "8h" : "4h";
8960 laneTy = Ity_I16;
8961 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
8962 }
8963 else if (imm5 & 4) {
8964 arT = isQ ? "4s" : "2s";
8965 laneTy = Ity_I32;
8966 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
8967 }
8968 else if ((imm5 & 8) && isQ) {
8969 arT = "2d";
8970 laneTy = Ity_I64;
8971 assign(w0, getIReg64orZR(nn));
8972 }
8973 else {
8974 /* invalid; leave laneTy unchanged. */
8975 }
8976 /* */
8977 if (laneTy != Ity_INVALID) {
8978 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
8979 putQReg128(dd, binop(Iop_64HLtoV128,
8980 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
8981 DIP("dup %s.%s, %s\n",
8982 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
8983 return True;
8984 }
8985 /* invalid */
8986 return False;
8987 }
8988
8989 /* -------- 1,0,0011: INS (general) -------- */
8990 /* 31 28 20 15 9 4
8991 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
8992 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
8993 xxx10 -> H, xxx
8994 xx100 -> S, xx
8995 x1000 -> D, x
8996 */
8997 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
8998 HChar ts = '?';
8999 UInt laneNo = 16;
9000 IRExpr* src = NULL;
9001 if (imm5 & 1) {
9002 src = unop(Iop_64to8, getIReg64orZR(nn));
9003 laneNo = (imm5 >> 1) & 15;
9004 ts = 'b';
9005 }
9006 else if (imm5 & 2) {
9007 src = unop(Iop_64to16, getIReg64orZR(nn));
9008 laneNo = (imm5 >> 2) & 7;
9009 ts = 'h';
9010 }
9011 else if (imm5 & 4) {
9012 src = unop(Iop_64to32, getIReg64orZR(nn));
9013 laneNo = (imm5 >> 3) & 3;
9014 ts = 's';
9015 }
9016 else if (imm5 & 8) {
9017 src = getIReg64orZR(nn);
9018 laneNo = (imm5 >> 4) & 1;
9019 ts = 'd';
9020 }
9021 /* */
9022 if (src) {
9023 vassert(laneNo < 16);
9024 putQRegLane(dd, laneNo, src);
9025 DIP("ins %s.%c[%u], %s\n",
9026 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
9027 return True;
9028 }
9029 /* invalid */
9030 return False;
9031 }
9032
9033 /* -------- x,0,0101: SMOV -------- */
9034 /* -------- x,0,0111: UMOV -------- */
9035 /* 31 28 20 15 9 4
9036 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
9037 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
9038 dest is Xd when q==1, Wd when q==0
9039 UMOV:
9040 Ts,index,ops = case q:imm5 of
9041 0:xxxx1 -> B, xxxx, 8Uto64
9042 1:xxxx1 -> invalid
9043 0:xxx10 -> H, xxx, 16Uto64
9044 1:xxx10 -> invalid
9045 0:xx100 -> S, xx, 32Uto64
9046 1:xx100 -> invalid
9047 1:x1000 -> D, x, copy64
9048 other -> invalid
9049 SMOV:
9050 Ts,index,ops = case q:imm5 of
9051 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
9052 1:xxxx1 -> B, xxxx, 8Sto64
9053 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
9054 1:xxx10 -> H, xxx, 16Sto64
9055 0:xx100 -> invalid
9056 1:xx100 -> S, xx, 32Sto64
9057 1:x1000 -> invalid
9058 other -> invalid
9059 */
9060 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
9061 Bool isU = (imm4 & 2) == 2;
9062 const HChar* arTs = "??";
9063 UInt laneNo = 16; /* invalid */
9064 // Setting 'res' to non-NULL determines valid/invalid
9065 IRExpr* res = NULL;
9066 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
9067 laneNo = (imm5 >> 1) & 15;
9068 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
9069 res = isU ? unop(Iop_8Uto64, lane)
9070 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
9071 arTs = "b";
9072 }
9073 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
9074 laneNo = (imm5 >> 1) & 15;
9075 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
9076 res = isU ? NULL
9077 : unop(Iop_8Sto64, lane);
9078 arTs = "b";
9079 }
9080 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
9081 laneNo = (imm5 >> 2) & 7;
9082 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
9083 res = isU ? unop(Iop_16Uto64, lane)
9084 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
9085 arTs = "h";
9086 }
9087 else if (bitQ && (imm5 & 2)) { // 1:xxx10
9088 laneNo = (imm5 >> 2) & 7;
9089 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
9090 res = isU ? NULL
9091 : unop(Iop_16Sto64, lane);
9092 arTs = "h";
9093 }
9094 else if (!bitQ && (imm5 & 4)) { // 0:xx100
9095 laneNo = (imm5 >> 3) & 3;
9096 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
9097 res = isU ? unop(Iop_32Uto64, lane)
9098 : NULL;
9099 arTs = "s";
9100 }
9101 else if (bitQ && (imm5 & 4)) { // 1:xxx10
9102 laneNo = (imm5 >> 3) & 3;
9103 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
9104 res = isU ? NULL
9105 : unop(Iop_32Sto64, lane);
9106 arTs = "s";
9107 }
9108 else if (bitQ && (imm5 & 8)) { // 1:x1000
9109 laneNo = (imm5 >> 4) & 1;
9110 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
9111 res = isU ? lane
9112 : NULL;
9113 arTs = "d";
9114 }
9115 /* */
9116 if (res) {
9117 vassert(laneNo < 16);
9118 putIReg64orZR(dd, res);
9119 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
9120 nameIRegOrZR(bitQ == 1, dd),
9121 nameQReg128(nn), arTs, laneNo);
9122 return True;
9123 }
9124 /* invalid */
9125 return False;
9126 }
9127
9128 /* -------- 1,1,xxxx: INS (element) -------- */
9129 /* 31 28 20 14 9 4
9130 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
9131 where Ts,ix1,ix2
9132 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
9133 xxx10 -> H, xxx, imm4[3:1]
9134 xx100 -> S, xx, imm4[3:2]
9135 x1000 -> D, x, imm4[3:3]
9136 */
9137 if (bitQ == 1 && bitOP == 1) {
9138 HChar ts = '?';
9139 IRType ity = Ity_INVALID;
9140 UInt ix1 = 16;
9141 UInt ix2 = 16;
9142 if (imm5 & 1) {
9143 ts = 'b';
9144 ity = Ity_I8;
9145 ix1 = (imm5 >> 1) & 15;
9146 ix2 = (imm4 >> 0) & 15;
9147 }
9148 else if (imm5 & 2) {
9149 ts = 'h';
9150 ity = Ity_I16;
9151 ix1 = (imm5 >> 2) & 7;
9152 ix2 = (imm4 >> 1) & 7;
9153 }
9154 else if (imm5 & 4) {
9155 ts = 's';
9156 ity = Ity_I32;
9157 ix1 = (imm5 >> 3) & 3;
9158 ix2 = (imm4 >> 2) & 3;
9159 }
9160 else if (imm5 & 8) {
9161 ts = 'd';
9162 ity = Ity_I64;
9163 ix1 = (imm5 >> 4) & 1;
9164 ix2 = (imm4 >> 3) & 1;
9165 }
9166 /* */
9167 if (ity != Ity_INVALID) {
9168 vassert(ix1 < 16);
9169 vassert(ix2 < 16);
9170 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
9171 DIP("ins %s.%c[%u], %s.%c[%u]\n",
9172 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
9173 return True;
9174 }
9175 /* invalid */
9176 return False;
9177 }
9178
9179 return False;
9180 # undef INSN
9181 }
9182
9183
9184 static
dis_AdvSIMD_modified_immediate(DisResult * dres,UInt insn)9185 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
9186 {
9187 /* 31 28 18 15 11 9 4
9188 0q op 01111 00000 abc cmode 01 defgh d
9189 Decode fields: q,op,cmode
9190 Bit 11 is really "o2", but it is always zero.
9191 */
9192 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9193 if (INSN(31,31) != 0
9194 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
9195 || INSN(11,10) != BITS2(0,1)) {
9196 return False;
9197 }
9198 UInt bitQ = INSN(30,30);
9199 UInt bitOP = INSN(29,29);
9200 UInt cmode = INSN(15,12);
9201 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
9202 UInt dd = INSN(4,0);
9203
9204 ULong imm64lo = 0;
9205 UInt op_cmode = (bitOP << 4) | cmode;
9206 Bool ok = False;
9207 Bool isORR = False;
9208 Bool isBIC = False;
9209 Bool isMOV = False;
9210 Bool isMVN = False;
9211 Bool isFMOV = False;
9212 switch (op_cmode) {
9213 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
9214 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
9215 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
9216 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
9217 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
9218 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
9219 ok = True; isMOV = True; break;
9220
9221 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
9222 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
9223 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
9224 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
9225 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
9226 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
9227 ok = True; isORR = True; break;
9228
9229 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
9230 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
9231 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
9232 ok = True; isMOV = True; break;
9233
9234 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
9235 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
9236 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
9237 ok = True; isORR = True; break;
9238
9239 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
9240 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
9241 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
9242 ok = True; isMOV = True; break;
9243
9244 /* -------- x,0,1110 MOVI 8-bit -------- */
9245 case BITS5(0,1,1,1,0):
9246 ok = True; isMOV = True; break;
9247
9248 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
9249 case BITS5(0,1,1,1,1): // 0:1111
9250 ok = True; isFMOV = True; break;
9251
9252 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
9253 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
9254 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
9255 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
9256 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
9257 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
9258 ok = True; isMVN = True; break;
9259
9260 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
9261 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
9262 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
9263 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
9264 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
9265 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
9266 ok = True; isBIC = True; break;
9267
9268 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
9269 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
9270 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
9271 ok = True; isMVN = True; break;
9272
9273 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
9274 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
9275 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
9276 ok = True; isBIC = True; break;
9277
9278 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
9279 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
9280 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
9281 ok = True; isMVN = True; break;
9282
9283 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
9284 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
9285 case BITS5(1,1,1,1,0):
9286 ok = True; isMOV = True; break;
9287
9288 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
9289 case BITS5(1,1,1,1,1): // 1:1111
9290 ok = bitQ == 1; isFMOV = True; break;
9291
9292 default:
9293 break;
9294 }
9295 if (ok) {
9296 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
9297 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
9298 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
9299 }
9300 if (ok) {
9301 if (isORR || isBIC) {
9302 ULong inv
9303 = isORR ? 0ULL : ~0ULL;
9304 IRExpr* immV128
9305 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
9306 IRExpr* res
9307 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
9308 const HChar* nm = isORR ? "orr" : "bic";
9309 if (bitQ == 0) {
9310 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
9311 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
9312 } else {
9313 putQReg128(dd, res);
9314 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
9315 nameQReg128(dd), imm64lo, imm64lo);
9316 }
9317 }
9318 else if (isMOV || isMVN || isFMOV) {
9319 if (isMVN) imm64lo = ~imm64lo;
9320 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
9321 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
9322 mkU64(imm64lo));
9323 putQReg128(dd, immV128);
9324 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
9325 }
9326 return True;
9327 }
9328 /* else fall through */
9329
9330 return False;
9331 # undef INSN
9332 }
9333
9334
9335 static
dis_AdvSIMD_scalar_copy(DisResult * dres,UInt insn)9336 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
9337 {
9338 /* 31 28 20 15 14 10 9 4
9339 01 op 11110000 imm5 0 imm4 1 n d
9340 Decode fields: op,imm4
9341 */
9342 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9343 if (INSN(31,30) != BITS2(0,1)
9344 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
9345 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9346 return False;
9347 }
9348 UInt bitOP = INSN(29,29);
9349 UInt imm5 = INSN(20,16);
9350 UInt imm4 = INSN(14,11);
9351 UInt nn = INSN(9,5);
9352 UInt dd = INSN(4,0);
9353
9354 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
9355 /* -------- 0,0000 DUP (element, scalar) -------- */
9356 IRTemp w0 = newTemp(Ity_I64);
9357 const HChar* arTs = "??";
9358 IRType laneTy = Ity_INVALID;
9359 UInt laneNo = 16; /* invalid */
9360 if (imm5 & 1) {
9361 arTs = "b";
9362 laneNo = (imm5 >> 1) & 15;
9363 laneTy = Ity_I8;
9364 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
9365 }
9366 else if (imm5 & 2) {
9367 arTs = "h";
9368 laneNo = (imm5 >> 2) & 7;
9369 laneTy = Ity_I16;
9370 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
9371 }
9372 else if (imm5 & 4) {
9373 arTs = "s";
9374 laneNo = (imm5 >> 3) & 3;
9375 laneTy = Ity_I32;
9376 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
9377 }
9378 else if (imm5 & 8) {
9379 arTs = "d";
9380 laneNo = (imm5 >> 4) & 1;
9381 laneTy = Ity_I64;
9382 assign(w0, getQRegLane(nn, laneNo, laneTy));
9383 }
9384 else {
9385 /* invalid; leave laneTy unchanged. */
9386 }
9387 /* */
9388 if (laneTy != Ity_INVALID) {
9389 vassert(laneNo < 16);
9390 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
9391 DIP("dup %s, %s.%s[%u]\n",
9392 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
9393 return True;
9394 }
9395 /* else fall through */
9396 }
9397
9398 return False;
9399 # undef INSN
9400 }
9401
9402
9403 static
dis_AdvSIMD_scalar_pairwise(DisResult * dres,UInt insn)9404 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
9405 {
9406 /* 31 28 23 21 16 11 9 4
9407 01 u 11110 sz 11000 opcode 10 n d
9408 Decode fields: u,sz,opcode
9409 */
9410 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9411 if (INSN(31,30) != BITS2(0,1)
9412 || INSN(28,24) != BITS5(1,1,1,1,0)
9413 || INSN(21,17) != BITS5(1,1,0,0,0)
9414 || INSN(11,10) != BITS2(1,0)) {
9415 return False;
9416 }
9417 UInt bitU = INSN(29,29);
9418 UInt sz = INSN(23,22);
9419 UInt opcode = INSN(16,12);
9420 UInt nn = INSN(9,5);
9421 UInt dd = INSN(4,0);
9422
9423 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9424 /* -------- 0,11,11011 ADDP d_2d -------- */
9425 IRTemp xy = newTempV128();
9426 IRTemp xx = newTempV128();
9427 assign(xy, getQReg128(nn));
9428 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9429 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9430 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9431 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9432 return True;
9433 }
9434
9435 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9436 /* -------- 1,00,01101 ADDP s_2s -------- */
9437 /* -------- 1,01,01101 ADDP d_2d -------- */
9438 Bool isD = sz == X01;
9439 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9440 IROp opADD = mkVecADDF(isD ? 3 : 2);
9441 IRTemp src = newTempV128();
9442 IRTemp argL = newTempV128();
9443 IRTemp argR = newTempV128();
9444 assign(src, getQReg128(nn));
9445 assign(argL, unop(opZHI, mkexpr(src)));
9446 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9447 mkU8(isD ? 8 : 4))));
9448 putQReg128(dd, unop(opZHI,
9449 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9450 mkexpr(argL), mkexpr(argR))));
9451 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9452 return True;
9453 }
9454
9455 if (bitU == 1
9456 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9457 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9458 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9459 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
9460 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
9461 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9462 Bool isD = (sz & 1) == 1;
9463 Bool isMIN = (sz & 2) == 2;
9464 Bool isNM = opcode == BITS5(0,1,1,0,0);
9465 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9466 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
9467 IRTemp src = newTempV128();
9468 IRTemp argL = newTempV128();
9469 IRTemp argR = newTempV128();
9470 assign(src, getQReg128(nn));
9471 assign(argL, unop(opZHI, mkexpr(src)));
9472 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9473 mkU8(isD ? 8 : 4))));
9474 putQReg128(dd, unop(opZHI,
9475 binop(opMXX, mkexpr(argL), mkexpr(argR))));
9476 HChar c = isD ? 'd' : 's';
9477 DIP("%s%sp %c%u, v%u.2%c\n",
9478 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
9479 return True;
9480 }
9481
9482 return False;
9483 # undef INSN
9484 }
9485
9486
9487 static
dis_AdvSIMD_scalar_shift_by_imm(DisResult * dres,UInt insn)9488 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9489 {
9490 /* 31 28 22 18 15 10 9 4
9491 01 u 111110 immh immb opcode 1 n d
9492 Decode fields: u,immh,opcode
9493 */
9494 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9495 if (INSN(31,30) != BITS2(0,1)
9496 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9497 return False;
9498 }
9499 UInt bitU = INSN(29,29);
9500 UInt immh = INSN(22,19);
9501 UInt immb = INSN(18,16);
9502 UInt opcode = INSN(15,11);
9503 UInt nn = INSN(9,5);
9504 UInt dd = INSN(4,0);
9505 UInt immhb = (immh << 3) | immb;
9506
9507 if ((immh & 8) == 8
9508 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9509 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9510 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9511 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9512 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9513 Bool isU = bitU == 1;
9514 Bool isAcc = opcode == BITS5(0,0,0,1,0);
9515 UInt sh = 128 - immhb;
9516 vassert(sh >= 1 && sh <= 64);
9517 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9518 IRExpr* src = getQReg128(nn);
9519 IRTemp shf = newTempV128();
9520 IRTemp res = newTempV128();
9521 if (sh == 64 && isU) {
9522 assign(shf, mkV128(0x0000));
9523 } else {
9524 UInt nudge = 0;
9525 if (sh == 64) {
9526 vassert(!isU);
9527 nudge = 1;
9528 }
9529 assign(shf, binop(op, src, mkU8(sh - nudge)));
9530 }
9531 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9532 : mkexpr(shf));
9533 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9534 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9535 : (isU ? "ushr" : "sshr");
9536 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9537 return True;
9538 }
9539
9540 if ((immh & 8) == 8
9541 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9542 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9543 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9544 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9545 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9546 Bool isU = bitU == 1;
9547 Bool isAcc = opcode == BITS5(0,0,1,1,0);
9548 UInt sh = 128 - immhb;
9549 vassert(sh >= 1 && sh <= 64);
9550 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
9551 vassert(sh >= 1 && sh <= 64);
9552 IRExpr* src = getQReg128(nn);
9553 IRTemp imm8 = newTemp(Ity_I8);
9554 assign(imm8, mkU8((UChar)(-sh)));
9555 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9556 IRTemp shf = newTempV128();
9557 IRTemp res = newTempV128();
9558 assign(shf, binop(op, src, amt));
9559 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9560 : mkexpr(shf));
9561 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9562 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9563 : (isU ? "urshr" : "srshr");
9564 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9565 return True;
9566 }
9567
9568 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
9569 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9570 UInt sh = 128 - immhb;
9571 vassert(sh >= 1 && sh <= 64);
9572 if (sh == 64) {
9573 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9574 } else {
9575 /* sh is in range 1 .. 63 */
9576 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
9577 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9578 IRTemp res = newTempV128();
9579 assign(res, binop(Iop_OrV128,
9580 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9581 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
9582 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9583 }
9584 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
9585 return True;
9586 }
9587
9588 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9589 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9590 UInt sh = immhb - 64;
9591 vassert(sh >= 0 && sh < 64);
9592 putQReg128(dd,
9593 unop(Iop_ZeroHI64ofV128,
9594 sh == 0 ? getQReg128(nn)
9595 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9596 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
9597 return True;
9598 }
9599
9600 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9601 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9602 UInt sh = immhb - 64;
9603 vassert(sh >= 0 && sh < 64);
9604 if (sh == 0) {
9605 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
9606 } else {
9607 /* sh is in range 1 .. 63 */
9608 ULong nmask = (1ULL << sh) - 1;
9609 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9610 IRTemp res = newTempV128();
9611 assign(res, binop(Iop_OrV128,
9612 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9613 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9614 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9615 }
9616 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
9617 return True;
9618 }
9619
9620 if (opcode == BITS5(0,1,1,1,0)
9621 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
9622 /* -------- 0,01110 SQSHL #imm -------- */
9623 /* -------- 1,01110 UQSHL #imm -------- */
9624 /* -------- 1,01100 SQSHLU #imm -------- */
9625 UInt size = 0;
9626 UInt shift = 0;
9627 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9628 if (!ok) return False;
9629 vassert(size >= 0 && size <= 3);
9630 /* The shift encoding has opposite sign for the leftwards case.
9631 Adjust shift to compensate. */
9632 UInt lanebits = 8 << size;
9633 shift = lanebits - shift;
9634 vassert(shift >= 0 && shift < lanebits);
9635 const HChar* nm = NULL;
9636 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
9637 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
9638 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
9639 else vassert(0);
9640 IRTemp qDiff1 = IRTemp_INVALID;
9641 IRTemp qDiff2 = IRTemp_INVALID;
9642 IRTemp res = IRTemp_INVALID;
9643 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
9644 /* This relies on the fact that the zeroed out lanes generate zeroed
9645 result lanes and don't saturate, so there's no point in trimming
9646 the resulting res, qDiff1 or qDiff2 values. */
9647 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
9648 putQReg128(dd, mkexpr(res));
9649 updateQCFLAGwithDifference(qDiff1, qDiff2);
9650 const HChar arr = "bhsd"[size];
9651 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
9652 return True;
9653 }
9654
9655 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
9656 || (bitU == 1
9657 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
9658 /* -------- 0,10010 SQSHRN #imm -------- */
9659 /* -------- 1,10010 UQSHRN #imm -------- */
9660 /* -------- 0,10011 SQRSHRN #imm -------- */
9661 /* -------- 1,10011 UQRSHRN #imm -------- */
9662 /* -------- 1,10000 SQSHRUN #imm -------- */
9663 /* -------- 1,10001 SQRSHRUN #imm -------- */
9664 UInt size = 0;
9665 UInt shift = 0;
9666 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9667 if (!ok || size == X11) return False;
9668 vassert(size >= X00 && size <= X10);
9669 vassert(shift >= 1 && shift <= (8 << size));
9670 const HChar* nm = "??";
9671 IROp op = Iop_INVALID;
9672 /* Decide on the name and the operation. */
9673 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9674 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
9675 }
9676 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9677 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
9678 }
9679 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
9680 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
9681 }
9682 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9683 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
9684 }
9685 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
9686 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
9687 }
9688 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
9689 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
9690 }
9691 else vassert(0);
9692 /* Compute the result (Q, shifted value) pair. */
9693 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
9694 IRTemp pair = newTempV128();
9695 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
9696 /* Update the result reg */
9697 IRTemp res64in128 = newTempV128();
9698 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
9699 putQReg128(dd, mkexpr(res64in128));
9700 /* Update the Q flag. */
9701 IRTemp q64q64 = newTempV128();
9702 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
9703 IRTemp z128 = newTempV128();
9704 assign(z128, mkV128(0x0000));
9705 updateQCFLAGwithDifference(q64q64, z128);
9706 /* */
9707 const HChar arrNarrow = "bhsd"[size];
9708 const HChar arrWide = "bhsd"[size+1];
9709 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
9710 return True;
9711 }
9712
9713 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
9714 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
9715 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
9716 UInt size = 0;
9717 UInt fbits = 0;
9718 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9719 /* The following holds because immh is never zero. */
9720 vassert(ok);
9721 /* The following holds because immh >= 0100. */
9722 vassert(size == X10 || size == X11);
9723 Bool isD = size == X11;
9724 Bool isU = bitU == 1;
9725 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9726 Double scale = two_to_the_minus(fbits);
9727 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9728 : IRExpr_Const(IRConst_F32( (Float)scale ));
9729 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
9730 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
9731 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
9732 IRType tyF = isD ? Ity_F64 : Ity_F32;
9733 IRType tyI = isD ? Ity_I64 : Ity_I32;
9734 IRTemp src = newTemp(tyI);
9735 IRTemp res = newTemp(tyF);
9736 IRTemp rm = mk_get_IR_rounding_mode();
9737 assign(src, getQRegLane(nn, 0, tyI));
9738 assign(res, triop(opMUL, mkexpr(rm),
9739 binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
9740 putQRegLane(dd, 0, mkexpr(res));
9741 if (!isD) {
9742 putQRegLane(dd, 1, mkU32(0));
9743 }
9744 putQRegLane(dd, 1, mkU64(0));
9745 const HChar ch = isD ? 'd' : 's';
9746 DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
9747 ch, dd, ch, nn, fbits);
9748 return True;
9749 }
9750
9751 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
9752 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
9753 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
9754 UInt size = 0;
9755 UInt fbits = 0;
9756 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9757 /* The following holds because immh is never zero. */
9758 vassert(ok);
9759 /* The following holds because immh >= 0100. */
9760 vassert(size == X10 || size == X11);
9761 Bool isD = size == X11;
9762 Bool isU = bitU == 1;
9763 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9764 Double scale = two_to_the_plus(fbits);
9765 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9766 : IRExpr_Const(IRConst_F32( (Float)scale ));
9767 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
9768 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
9769 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
9770 IRType tyF = isD ? Ity_F64 : Ity_F32;
9771 IRType tyI = isD ? Ity_I64 : Ity_I32;
9772 IRTemp src = newTemp(tyF);
9773 IRTemp res = newTemp(tyI);
9774 IRTemp rm = newTemp(Ity_I32);
9775 assign(src, getQRegLane(nn, 0, tyF));
9776 assign(rm, mkU32(Irrm_ZERO));
9777 assign(res, binop(opCVT, mkexpr(rm),
9778 triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
9779 putQRegLane(dd, 0, mkexpr(res));
9780 if (!isD) {
9781 putQRegLane(dd, 1, mkU32(0));
9782 }
9783 putQRegLane(dd, 1, mkU64(0));
9784 const HChar ch = isD ? 'd' : 's';
9785 DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
9786 ch, dd, ch, nn, fbits);
9787 return True;
9788 }
9789
9790 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9791 return False;
9792 # undef INSN
9793 }
9794
9795
9796 static
dis_AdvSIMD_scalar_three_different(DisResult * dres,UInt insn)9797 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
9798 {
9799 /* 31 29 28 23 21 20 15 11 9 4
9800 01 U 11110 size 1 m opcode 00 n d
9801 Decode fields: u,opcode
9802 */
9803 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9804 if (INSN(31,30) != BITS2(0,1)
9805 || INSN(28,24) != BITS5(1,1,1,1,0)
9806 || INSN(21,21) != 1
9807 || INSN(11,10) != BITS2(0,0)) {
9808 return False;
9809 }
9810 UInt bitU = INSN(29,29);
9811 UInt size = INSN(23,22);
9812 UInt mm = INSN(20,16);
9813 UInt opcode = INSN(15,12);
9814 UInt nn = INSN(9,5);
9815 UInt dd = INSN(4,0);
9816 vassert(size < 4);
9817
9818 if (bitU == 0
9819 && (opcode == BITS4(1,1,0,1)
9820 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
9821 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
9822 /* -------- 0,1001 SQDMLAL -------- */ // 1
9823 /* -------- 0,1011 SQDMLSL -------- */ // 2
9824 /* Widens, and size refers to the narrowed lanes. */
9825 UInt ks = 3;
9826 switch (opcode) {
9827 case BITS4(1,1,0,1): ks = 0; break;
9828 case BITS4(1,0,0,1): ks = 1; break;
9829 case BITS4(1,0,1,1): ks = 2; break;
9830 default: vassert(0);
9831 }
9832 vassert(ks >= 0 && ks <= 2);
9833 if (size == X00 || size == X11) return False;
9834 vassert(size <= 2);
9835 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
9836 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9837 newTempsV128_3(&vecN, &vecM, &vecD);
9838 assign(vecN, getQReg128(nn));
9839 assign(vecM, getQReg128(mm));
9840 assign(vecD, getQReg128(dd));
9841 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9842 False/*!is2*/, size, "mas"[ks],
9843 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9844 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
9845 putQReg128(dd, unop(opZHI, mkexpr(res)));
9846 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9847 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9848 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9849 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
9850 }
9851 const HChar* nm = ks == 0 ? "sqdmull"
9852 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9853 const HChar arrNarrow = "bhsd"[size];
9854 const HChar arrWide = "bhsd"[size+1];
9855 DIP("%s %c%u, %c%u, %c%u\n",
9856 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
9857 return True;
9858 }
9859
9860 return False;
9861 # undef INSN
9862 }
9863
9864
9865 static
dis_AdvSIMD_scalar_three_same(DisResult * dres,UInt insn)9866 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
9867 {
9868 /* 31 29 28 23 21 20 15 10 9 4
9869 01 U 11110 size 1 m opcode 1 n d
9870 Decode fields: u,size,opcode
9871 */
9872 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9873 if (INSN(31,30) != BITS2(0,1)
9874 || INSN(28,24) != BITS5(1,1,1,1,0)
9875 || INSN(21,21) != 1
9876 || INSN(10,10) != 1) {
9877 return False;
9878 }
9879 UInt bitU = INSN(29,29);
9880 UInt size = INSN(23,22);
9881 UInt mm = INSN(20,16);
9882 UInt opcode = INSN(15,11);
9883 UInt nn = INSN(9,5);
9884 UInt dd = INSN(4,0);
9885 vassert(size < 4);
9886
9887 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
9888 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
9889 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
9890 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
9891 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
9892 Bool isADD = opcode == BITS5(0,0,0,0,1);
9893 Bool isU = bitU == 1;
9894 IROp qop = Iop_INVALID;
9895 IROp nop = Iop_INVALID;
9896 if (isADD) {
9897 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
9898 nop = mkVecADD(size);
9899 } else {
9900 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
9901 nop = mkVecSUB(size);
9902 }
9903 IRTemp argL = newTempV128();
9904 IRTemp argR = newTempV128();
9905 IRTemp qres = newTempV128();
9906 IRTemp nres = newTempV128();
9907 assign(argL, getQReg128(nn));
9908 assign(argR, getQReg128(mm));
9909 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9910 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
9911 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9912 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
9913 putQReg128(dd, mkexpr(qres));
9914 updateQCFLAGwithDifference(qres, nres);
9915 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
9916 : (isU ? "uqsub" : "sqsub");
9917 const HChar arr = "bhsd"[size];
9918 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9919 return True;
9920 }
9921
9922 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
9923 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
9924 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
9925 Bool isGT = bitU == 0;
9926 IRExpr* argL = getQReg128(nn);
9927 IRExpr* argR = getQReg128(mm);
9928 IRTemp res = newTempV128();
9929 assign(res,
9930 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9931 : binop(Iop_CmpGT64Ux2, argL, argR));
9932 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9933 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
9934 nameQRegLO(dd, Ity_I64),
9935 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9936 return True;
9937 }
9938
9939 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
9940 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
9941 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
9942 Bool isGE = bitU == 0;
9943 IRExpr* argL = getQReg128(nn);
9944 IRExpr* argR = getQReg128(mm);
9945 IRTemp res = newTempV128();
9946 assign(res,
9947 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
9948 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
9949 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9950 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
9951 nameQRegLO(dd, Ity_I64),
9952 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9953 return True;
9954 }
9955
9956 if (size == X11 && (opcode == BITS5(0,1,0,0,0)
9957 || opcode == BITS5(0,1,0,1,0))) {
9958 /* -------- 0,xx,01000 SSHL d_d_d -------- */
9959 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
9960 /* -------- 1,xx,01000 USHL d_d_d -------- */
9961 /* -------- 1,xx,01010 URSHL d_d_d -------- */
9962 Bool isU = bitU == 1;
9963 Bool isR = opcode == BITS5(0,1,0,1,0);
9964 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
9965 : (isU ? mkVecSHU(size) : mkVecSHS(size));
9966 IRTemp res = newTempV128();
9967 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9968 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9969 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
9970 : (isU ? "ushl" : "sshl");
9971 DIP("%s %s, %s, %s\n", nm,
9972 nameQRegLO(dd, Ity_I64),
9973 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9974 return True;
9975 }
9976
9977 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
9978 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
9979 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
9980 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
9981 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
9982 Bool isU = bitU == 1;
9983 Bool isR = opcode == BITS5(0,1,0,1,1);
9984 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
9985 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
9986 /* This is a bit tricky. Since we're only interested in the lowest
9987 lane of the result, we zero out all the rest in the operands, so
9988 as to ensure that other lanes don't pollute the returned Q value.
9989 This works because it means, for the lanes we don't care about, we
9990 are shifting zero by zero, which can never saturate. */
9991 IRTemp res256 = newTemp(Ity_V256);
9992 IRTemp resSH = newTempV128();
9993 IRTemp resQ = newTempV128();
9994 IRTemp zero = newTempV128();
9995 assign(
9996 res256,
9997 binop(op,
9998 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
9999 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
10000 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
10001 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
10002 assign(zero, mkV128(0x0000));
10003 putQReg128(dd, mkexpr(resSH));
10004 updateQCFLAGwithDifference(resQ, zero);
10005 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
10006 : (isU ? "uqshl" : "sqshl");
10007 const HChar arr = "bhsd"[size];
10008 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10009 return True;
10010 }
10011
10012 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
10013 /* -------- 0,11,10000 ADD d_d_d -------- */
10014 /* -------- 1,11,10000 SUB d_d_d -------- */
10015 Bool isSUB = bitU == 1;
10016 IRTemp res = newTemp(Ity_I64);
10017 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
10018 getQRegLane(nn, 0, Ity_I64),
10019 getQRegLane(mm, 0, Ity_I64)));
10020 putQRegLane(dd, 0, mkexpr(res));
10021 putQRegLane(dd, 1, mkU64(0));
10022 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
10023 nameQRegLO(dd, Ity_I64),
10024 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10025 return True;
10026 }
10027
10028 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
10029 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
10030 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
10031 Bool isEQ = bitU == 1;
10032 IRExpr* argL = getQReg128(nn);
10033 IRExpr* argR = getQReg128(mm);
10034 IRTemp res = newTempV128();
10035 assign(res,
10036 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10037 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
10038 binop(Iop_AndV128, argL, argR),
10039 mkV128(0x0000))));
10040 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10041 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
10042 nameQRegLO(dd, Ity_I64),
10043 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
10044 return True;
10045 }
10046
10047 if (opcode == BITS5(1,0,1,1,0)) {
10048 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
10049 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
10050 if (size == X00 || size == X11) return False;
10051 Bool isR = bitU == 1;
10052 IRTemp res, sat1q, sat1n, vN, vM;
10053 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10054 newTempsV128_2(&vN, &vM);
10055 assign(vN, getQReg128(nn));
10056 assign(vM, getQReg128(mm));
10057 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10058 putQReg128(dd,
10059 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
10060 updateQCFLAGwithDifference(
10061 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
10062 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
10063 const HChar arr = "bhsd"[size];
10064 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10065 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
10066 return True;
10067 }
10068
10069 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
10070 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
10071 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
10072 IRTemp res = newTemp(ity);
10073 assign(res, unop(mkABSF(ity),
10074 triop(mkSUBF(ity),
10075 mkexpr(mk_get_IR_rounding_mode()),
10076 getQRegLO(nn,ity), getQRegLO(mm,ity))));
10077 putQReg128(dd, mkV128(0x0000));
10078 putQRegLO(dd, mkexpr(res));
10079 DIP("fabd %s, %s, %s\n",
10080 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10081 return True;
10082 }
10083
10084 if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
10085 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
10086 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10087 IRType ity = size == X01 ? Ity_F64 : Ity_F32;
10088 IRTemp res = newTemp(ity);
10089 assign(res, triop(mkMULF(ity),
10090 mkexpr(mk_get_IR_rounding_mode()),
10091 getQRegLO(nn,ity), getQRegLO(mm,ity)));
10092 putQReg128(dd, mkV128(0x0000));
10093 putQRegLO(dd, mkexpr(res));
10094 DIP("fmulx %s, %s, %s\n",
10095 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10096 return True;
10097 }
10098
10099 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
10100 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
10101 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
10102 Bool isD = size == X01;
10103 IRType ity = isD ? Ity_F64 : Ity_F32;
10104 Bool isGE = bitU == 1;
10105 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
10106 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
10107 IRTemp res = newTempV128();
10108 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
10109 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
10110 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10111 mkexpr(res))));
10112 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
10113 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10114 return True;
10115 }
10116
10117 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
10118 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
10119 Bool isD = size == X11;
10120 IRType ity = isD ? Ity_F64 : Ity_F32;
10121 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10122 IRTemp res = newTempV128();
10123 assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
10124 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10125 mkexpr(res))));
10126 DIP("%s %s, %s, %s\n", "fcmgt",
10127 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10128 return True;
10129 }
10130
10131 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
10132 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
10133 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
10134 Bool isD = (size & 1) == 1;
10135 IRType ity = isD ? Ity_F64 : Ity_F32;
10136 Bool isGT = (size & 2) == 2;
10137 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
10138 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
10139 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
10140 IRTemp res = newTempV128();
10141 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
10142 unop(opABS, getQReg128(nn)))); // swapd
10143 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10144 mkexpr(res))));
10145 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
10146 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10147 return True;
10148 }
10149
10150 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
10151 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
10152 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
10153 Bool isSQRT = (size & 2) == 2;
10154 Bool isD = (size & 1) == 1;
10155 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
10156 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
10157 IRTemp res = newTempV128();
10158 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
10159 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10160 mkexpr(res))));
10161 HChar c = isD ? 'd' : 's';
10162 DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
10163 c, dd, c, nn, c, mm);
10164 return True;
10165 }
10166
10167 return False;
10168 # undef INSN
10169 }
10170
10171
10172 static
dis_AdvSIMD_scalar_two_reg_misc(DisResult * dres,UInt insn)10173 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
10174 {
10175 /* 31 29 28 23 21 16 11 9 4
10176 01 U 11110 size 10000 opcode 10 n d
10177 Decode fields: u,size,opcode
10178 */
10179 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10180 if (INSN(31,30) != BITS2(0,1)
10181 || INSN(28,24) != BITS5(1,1,1,1,0)
10182 || INSN(21,17) != BITS5(1,0,0,0,0)
10183 || INSN(11,10) != BITS2(1,0)) {
10184 return False;
10185 }
10186 UInt bitU = INSN(29,29);
10187 UInt size = INSN(23,22);
10188 UInt opcode = INSN(16,12);
10189 UInt nn = INSN(9,5);
10190 UInt dd = INSN(4,0);
10191 vassert(size < 4);
10192
10193 if (opcode == BITS5(0,0,0,1,1)) {
10194 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
10195 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
10196 /* These are a bit tricky (to say the least). See comments on
10197 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
10198 details. */
10199 Bool isUSQADD = bitU == 1;
10200 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
10201 : mkVecQADDEXTUSSATSS(size);
10202 IROp nop = mkVecADD(size);
10203 IRTemp argL = newTempV128();
10204 IRTemp argR = newTempV128();
10205 assign(argL, getQReg128(nn));
10206 assign(argR, getQReg128(dd));
10207 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10208 size, binop(qop, mkexpr(argL), mkexpr(argR)));
10209 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10210 size, binop(nop, mkexpr(argL), mkexpr(argR)));
10211 putQReg128(dd, mkexpr(qres));
10212 updateQCFLAGwithDifference(qres, nres);
10213 const HChar arr = "bhsd"[size];
10214 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
10215 return True;
10216 }
10217
10218 if (opcode == BITS5(0,0,1,1,1)) {
10219 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
10220 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
10221 Bool isNEG = bitU == 1;
10222 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
10223 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
10224 getQReg128(nn), size );
10225 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
10226 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
10227 putQReg128(dd, mkexpr(qres));
10228 updateQCFLAGwithDifference(qres, nres);
10229 const HChar arr = "bhsd"[size];
10230 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
10231 return True;
10232 }
10233
10234 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
10235 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
10236 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
10237 Bool isGT = bitU == 0;
10238 IRExpr* argL = getQReg128(nn);
10239 IRExpr* argR = mkV128(0x0000);
10240 IRTemp res = newTempV128();
10241 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
10242 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
10243 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10244 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
10245 return True;
10246 }
10247
10248 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
10249 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
10250 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
10251 Bool isEQ = bitU == 0;
10252 IRExpr* argL = getQReg128(nn);
10253 IRExpr* argR = mkV128(0x0000);
10254 IRTemp res = newTempV128();
10255 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10256 : unop(Iop_NotV128,
10257 binop(Iop_CmpGT64Sx2, argL, argR)));
10258 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10259 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
10260 return True;
10261 }
10262
10263 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
10264 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
10265 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10266 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
10267 getQReg128(nn))));
10268 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
10269 return True;
10270 }
10271
10272 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10273 /* -------- 0,11,01011 ABS d_d -------- */
10274 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10275 unop(Iop_Abs64x2, getQReg128(nn))));
10276 DIP("abs d%u, d%u\n", dd, nn);
10277 return True;
10278 }
10279
10280 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10281 /* -------- 1,11,01011 NEG d_d -------- */
10282 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10283 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
10284 DIP("neg d%u, d%u\n", dd, nn);
10285 return True;
10286 }
10287
10288 UInt ix = 0; /*INVALID*/
10289 if (size >= X10) {
10290 switch (opcode) {
10291 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
10292 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
10293 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
10294 default: break;
10295 }
10296 }
10297 if (ix > 0) {
10298 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
10299 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
10300 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
10301 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
10302 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
10303 Bool isD = size == X11;
10304 IRType ity = isD ? Ity_F64 : Ity_F32;
10305 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
10306 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
10307 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10308 IROp opCmp = Iop_INVALID;
10309 Bool swap = False;
10310 const HChar* nm = "??";
10311 switch (ix) {
10312 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
10313 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
10314 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
10315 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
10316 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
10317 default: vassert(0);
10318 }
10319 IRExpr* zero = mkV128(0x0000);
10320 IRTemp res = newTempV128();
10321 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
10322 : binop(opCmp, getQReg128(nn), zero));
10323 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10324 mkexpr(res))));
10325
10326 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10327 return True;
10328 }
10329
10330 if (opcode == BITS5(1,0,1,0,0)
10331 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
10332 /* -------- 0,xx,10100: SQXTN -------- */
10333 /* -------- 1,xx,10100: UQXTN -------- */
10334 /* -------- 1,xx,10010: SQXTUN -------- */
10335 if (size == X11) return False;
10336 vassert(size < 3);
10337 IROp opN = Iop_INVALID;
10338 Bool zWiden = True;
10339 const HChar* nm = "??";
10340 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
10341 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
10342 }
10343 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
10344 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
10345 }
10346 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10347 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
10348 }
10349 else vassert(0);
10350 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10351 size+1, getQReg128(nn));
10352 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10353 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
10354 putQReg128(dd, mkexpr(resN));
10355 /* This widens zero lanes to zero, and compares it against zero, so all
10356 of the non-participating lanes make no contribution to the
10357 Q flag state. */
10358 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
10359 size, mkexpr(resN));
10360 updateQCFLAGwithDifference(src, resW);
10361 const HChar arrNarrow = "bhsd"[size];
10362 const HChar arrWide = "bhsd"[size+1];
10363 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
10364 return True;
10365 }
10366
10367 if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
10368 /* -------- 1,01,10110 FCVTXN s_d -------- */
10369 /* Using Irrm_NEAREST here isn't right. The docs say "round to
10370 odd" but I don't know what that really means. */
10371 putQRegLO(dd,
10372 binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
10373 getQRegLO(nn, Ity_F64)));
10374 putQRegLane(dd, 1, mkU32(0));
10375 putQRegLane(dd, 1, mkU64(0));
10376 DIP("fcvtxn s%u, d%u\n", dd, nn);
10377 return True;
10378 }
10379
10380 ix = 0; /*INVALID*/
10381 switch (opcode) {
10382 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
10383 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
10384 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
10385 default: break;
10386 }
10387 if (ix > 0) {
10388 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10389 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10390 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10391 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10392 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10393 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10394 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10395 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10396 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10397 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10398 Bool isD = (size & 1) == 1;
10399 IRType tyF = isD ? Ity_F64 : Ity_F32;
10400 IRType tyI = isD ? Ity_I64 : Ity_I32;
10401 IRRoundingMode irrm = 8; /*impossible*/
10402 HChar ch = '?';
10403 switch (ix) {
10404 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
10405 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
10406 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
10407 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
10408 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
10409 default: vassert(0);
10410 }
10411 IROp cvt = Iop_INVALID;
10412 if (bitU == 1) {
10413 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
10414 } else {
10415 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
10416 }
10417 IRTemp src = newTemp(tyF);
10418 IRTemp res = newTemp(tyI);
10419 assign(src, getQRegLane(nn, 0, tyF));
10420 assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
10421 putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
10422 if (!isD) {
10423 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10424 }
10425 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10426 HChar sOrD = isD ? 'd' : 's';
10427 DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
10428 sOrD, dd, sOrD, nn);
10429 return True;
10430 }
10431
10432 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
10433 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10434 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10435 Bool isU = bitU == 1;
10436 Bool isD = (size & 1) == 1;
10437 IRType tyI = isD ? Ity_I64 : Ity_I32;
10438 IROp iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10439 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10440 IRTemp rm = mk_get_IR_rounding_mode();
10441 putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
10442 if (!isD) {
10443 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10444 }
10445 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10446 HChar c = isD ? 'd' : 's';
10447 DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
10448 return True;
10449 }
10450
10451 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
10452 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
10453 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10454 Bool isSQRT = bitU == 1;
10455 Bool isD = (size & 1) == 1;
10456 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
10457 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
10458 IRTemp resV = newTempV128();
10459 assign(resV, unop(op, getQReg128(nn)));
10460 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10461 mkexpr(resV))));
10462 HChar c = isD ? 'd' : 's';
10463 DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
10464 return True;
10465 }
10466
10467 if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
10468 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
10469 Bool isD = (size & 1) == 1;
10470 IRType ty = isD ? Ity_F64 : Ity_F32;
10471 IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
10472 IRTemp res = newTemp(ty);
10473 IRTemp rm = mk_get_IR_rounding_mode();
10474 assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
10475 putQReg128(dd, mkV128(0x0000));
10476 putQRegLane(dd, 0, mkexpr(res));
10477 HChar c = isD ? 'd' : 's';
10478 DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
10479 return True;
10480 }
10481
10482 return False;
10483 # undef INSN
10484 }
10485
10486
10487 static
dis_AdvSIMD_scalar_x_indexed_element(DisResult * dres,UInt insn)10488 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
10489 {
10490 /* 31 28 23 21 20 19 15 11 9 4
10491 01 U 11111 size L M m opcode H 0 n d
10492 Decode fields are: u,size,opcode
10493 M is really part of the mm register number. Individual
10494 cases need to inspect L and H though.
10495 */
10496 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10497 if (INSN(31,30) != BITS2(0,1)
10498 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
10499 return False;
10500 }
10501 UInt bitU = INSN(29,29);
10502 UInt size = INSN(23,22);
10503 UInt bitL = INSN(21,21);
10504 UInt bitM = INSN(20,20);
10505 UInt mmLO4 = INSN(19,16);
10506 UInt opcode = INSN(15,12);
10507 UInt bitH = INSN(11,11);
10508 UInt nn = INSN(9,5);
10509 UInt dd = INSN(4,0);
10510 vassert(size < 4);
10511 vassert(bitH < 2 && bitM < 2 && bitL < 2);
10512
10513 if (bitU == 0 && size >= X10
10514 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
10515 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10516 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10517 Bool isD = (size & 1) == 1;
10518 Bool isSUB = opcode == BITS4(0,1,0,1);
10519 UInt index;
10520 if (!isD) index = (bitH << 1) | bitL;
10521 else if (isD && bitL == 0) index = bitH;
10522 else return False; // sz:L == x11 => unallocated encoding
10523 vassert(index < (isD ? 2 : 4));
10524 IRType ity = isD ? Ity_F64 : Ity_F32;
10525 IRTemp elem = newTemp(ity);
10526 UInt mm = (bitM << 4) | mmLO4;
10527 assign(elem, getQRegLane(mm, index, ity));
10528 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10529 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
10530 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
10531 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10532 IRTemp rm = mk_get_IR_rounding_mode();
10533 IRTemp t1 = newTempV128();
10534 IRTemp t2 = newTempV128();
10535 // FIXME: double rounding; use FMA primops instead
10536 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10537 assign(t2, triop(isSUB ? opSUB : opADD,
10538 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
10539 putQReg128(dd,
10540 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10541 mkexpr(t2))));
10542 const HChar c = isD ? 'd' : 's';
10543 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
10544 c, dd, c, nn, nameQReg128(mm), c, index);
10545 return True;
10546 }
10547
10548 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
10549 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
10550 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10551 Bool isD = (size & 1) == 1;
10552 Bool isMULX = bitU == 1;
10553 UInt index;
10554 if (!isD) index = (bitH << 1) | bitL;
10555 else if (isD && bitL == 0) index = bitH;
10556 else return False; // sz:L == x11 => unallocated encoding
10557 vassert(index < (isD ? 2 : 4));
10558 IRType ity = isD ? Ity_F64 : Ity_F32;
10559 IRTemp elem = newTemp(ity);
10560 UInt mm = (bitM << 4) | mmLO4;
10561 assign(elem, getQRegLane(mm, index, ity));
10562 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10563 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10564 IRTemp rm = mk_get_IR_rounding_mode();
10565 IRTemp t1 = newTempV128();
10566 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10567 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10568 putQReg128(dd,
10569 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10570 mkexpr(t1))));
10571 const HChar c = isD ? 'd' : 's';
10572 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
10573 c, dd, c, nn, nameQReg128(mm), c, index);
10574 return True;
10575 }
10576
10577 if (bitU == 0
10578 && (opcode == BITS4(1,0,1,1)
10579 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
10580 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10581 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10582 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10583 /* Widens, and size refers to the narrowed lanes. */
10584 UInt ks = 3;
10585 switch (opcode) {
10586 case BITS4(1,0,1,1): ks = 0; break;
10587 case BITS4(0,0,1,1): ks = 1; break;
10588 case BITS4(0,1,1,1): ks = 2; break;
10589 default: vassert(0);
10590 }
10591 vassert(ks >= 0 && ks <= 2);
10592 UInt mm = 32; // invalid
10593 UInt ix = 16; // invalid
10594 switch (size) {
10595 case X00:
10596 return False; // h_b_b[] case is not allowed
10597 case X01:
10598 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10599 case X10:
10600 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10601 case X11:
10602 return False; // q_d_d[] case is not allowed
10603 default:
10604 vassert(0);
10605 }
10606 vassert(mm < 32 && ix < 16);
10607 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
10608 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10609 newTempsV128_2(&vecN, &vecD);
10610 assign(vecN, getQReg128(nn));
10611 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10612 assign(vecD, getQReg128(dd));
10613 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10614 False/*!is2*/, size, "mas"[ks],
10615 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10616 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10617 putQReg128(dd, unop(opZHI, mkexpr(res)));
10618 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10619 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10620 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10621 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10622 }
10623 const HChar* nm = ks == 0 ? "sqmull"
10624 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10625 const HChar arrNarrow = "bhsd"[size];
10626 const HChar arrWide = "bhsd"[size+1];
10627 DIP("%s %c%u, %c%u, v%u.%c[%u]\n",
10628 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
10629 return True;
10630 }
10631
10632 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
10633 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10634 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10635 UInt mm = 32; // invalid
10636 UInt ix = 16; // invalid
10637 switch (size) {
10638 case X00:
10639 return False; // b case is not allowed
10640 case X01:
10641 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10642 case X10:
10643 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10644 case X11:
10645 return False; // q case is not allowed
10646 default:
10647 vassert(0);
10648 }
10649 vassert(mm < 32 && ix < 16);
10650 Bool isR = opcode == BITS4(1,1,0,1);
10651 IRTemp res, sat1q, sat1n, vN, vM;
10652 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10653 vN = newTempV128();
10654 assign(vN, getQReg128(nn));
10655 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10656 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10657 IROp opZHI = mkVecZEROHIxxOFV128(size);
10658 putQReg128(dd, unop(opZHI, mkexpr(res)));
10659 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10660 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10661 HChar ch = size == X01 ? 'h' : 's';
10662 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
10663 return True;
10664 }
10665
10666 return False;
10667 # undef INSN
10668 }
10669
10670
10671 static
dis_AdvSIMD_shift_by_immediate(DisResult * dres,UInt insn)10672 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10673 {
10674 /* 31 28 22 18 15 10 9 4
10675 0 q u 011110 immh immb opcode 1 n d
10676 Decode fields: u,opcode
10677 */
10678 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10679 if (INSN(31,31) != 0
10680 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
10681 return False;
10682 }
10683 UInt bitQ = INSN(30,30);
10684 UInt bitU = INSN(29,29);
10685 UInt immh = INSN(22,19);
10686 UInt immb = INSN(18,16);
10687 UInt opcode = INSN(15,11);
10688 UInt nn = INSN(9,5);
10689 UInt dd = INSN(4,0);
10690
10691 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
10692 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
10693 /* -------- 1,00000 USHR std7_std7_#imm -------- */
10694 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
10695 /* -------- 1,00010 USRA std7_std7_#imm -------- */
10696 /* laneTy, shift = case immh:immb of
10697 0001:xxx -> B, SHR:8-xxx
10698 001x:xxx -> H, SHR:16-xxxx
10699 01xx:xxx -> S, SHR:32-xxxxx
10700 1xxx:xxx -> D, SHR:64-xxxxxx
10701 other -> invalid
10702 */
10703 UInt size = 0;
10704 UInt shift = 0;
10705 Bool isQ = bitQ == 1;
10706 Bool isU = bitU == 1;
10707 Bool isAcc = opcode == BITS5(0,0,0,1,0);
10708 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10709 if (!ok || (bitQ == 0 && size == X11)) return False;
10710 vassert(size >= 0 && size <= 3);
10711 UInt lanebits = 8 << size;
10712 vassert(shift >= 1 && shift <= lanebits);
10713 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
10714 IRExpr* src = getQReg128(nn);
10715 IRTemp shf = newTempV128();
10716 IRTemp res = newTempV128();
10717 if (shift == lanebits && isU) {
10718 assign(shf, mkV128(0x0000));
10719 } else {
10720 UInt nudge = 0;
10721 if (shift == lanebits) {
10722 vassert(!isU);
10723 nudge = 1;
10724 }
10725 assign(shf, binop(op, src, mkU8(shift - nudge)));
10726 }
10727 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10728 : mkexpr(shf));
10729 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10730 HChar laneCh = "bhsd"[size];
10731 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10732 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
10733 : (isU ? "ushr" : "sshr");
10734 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10735 nameQReg128(dd), nLanes, laneCh,
10736 nameQReg128(nn), nLanes, laneCh, shift);
10737 return True;
10738 }
10739
10740 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
10741 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
10742 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
10743 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
10744 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
10745 /* laneTy, shift = case immh:immb of
10746 0001:xxx -> B, SHR:8-xxx
10747 001x:xxx -> H, SHR:16-xxxx
10748 01xx:xxx -> S, SHR:32-xxxxx
10749 1xxx:xxx -> D, SHR:64-xxxxxx
10750 other -> invalid
10751 */
10752 UInt size = 0;
10753 UInt shift = 0;
10754 Bool isQ = bitQ == 1;
10755 Bool isU = bitU == 1;
10756 Bool isAcc = opcode == BITS5(0,0,1,1,0);
10757 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10758 if (!ok || (bitQ == 0 && size == X11)) return False;
10759 vassert(size >= 0 && size <= 3);
10760 UInt lanebits = 8 << size;
10761 vassert(shift >= 1 && shift <= lanebits);
10762 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
10763 IRExpr* src = getQReg128(nn);
10764 IRTemp imm8 = newTemp(Ity_I8);
10765 assign(imm8, mkU8((UChar)(-shift)));
10766 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
10767 IRTemp shf = newTempV128();
10768 IRTemp res = newTempV128();
10769 assign(shf, binop(op, src, amt));
10770 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10771 : mkexpr(shf));
10772 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10773 HChar laneCh = "bhsd"[size];
10774 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10775 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
10776 : (isU ? "urshr" : "srshr");
10777 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10778 nameQReg128(dd), nLanes, laneCh,
10779 nameQReg128(nn), nLanes, laneCh, shift);
10780 return True;
10781 }
10782
10783 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
10784 /* -------- 1,01000 SRI std7_std7_#imm -------- */
10785 /* laneTy, shift = case immh:immb of
10786 0001:xxx -> B, SHR:8-xxx
10787 001x:xxx -> H, SHR:16-xxxx
10788 01xx:xxx -> S, SHR:32-xxxxx
10789 1xxx:xxx -> D, SHR:64-xxxxxx
10790 other -> invalid
10791 */
10792 UInt size = 0;
10793 UInt shift = 0;
10794 Bool isQ = bitQ == 1;
10795 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10796 if (!ok || (bitQ == 0 && size == X11)) return False;
10797 vassert(size >= 0 && size <= 3);
10798 UInt lanebits = 8 << size;
10799 vassert(shift >= 1 && shift <= lanebits);
10800 IRExpr* src = getQReg128(nn);
10801 IRTemp res = newTempV128();
10802 if (shift == lanebits) {
10803 assign(res, getQReg128(dd));
10804 } else {
10805 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
10806 IRExpr* nmask = binop(mkVecSHLN(size),
10807 mkV128(0xFFFF), mkU8(lanebits - shift));
10808 IRTemp tmp = newTempV128();
10809 assign(tmp, binop(Iop_OrV128,
10810 mkexpr(res),
10811 binop(Iop_AndV128, getQReg128(dd), nmask)));
10812 res = tmp;
10813 }
10814 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10815 HChar laneCh = "bhsd"[size];
10816 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10817 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
10818 nameQReg128(dd), nLanes, laneCh,
10819 nameQReg128(nn), nLanes, laneCh, shift);
10820 return True;
10821 }
10822
10823 if (opcode == BITS5(0,1,0,1,0)) {
10824 /* -------- 0,01010 SHL std7_std7_#imm -------- */
10825 /* -------- 1,01010 SLI std7_std7_#imm -------- */
10826 /* laneTy, shift = case immh:immb of
10827 0001:xxx -> B, xxx
10828 001x:xxx -> H, xxxx
10829 01xx:xxx -> S, xxxxx
10830 1xxx:xxx -> D, xxxxxx
10831 other -> invalid
10832 */
10833 UInt size = 0;
10834 UInt shift = 0;
10835 Bool isSLI = bitU == 1;
10836 Bool isQ = bitQ == 1;
10837 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10838 if (!ok || (bitQ == 0 && size == X11)) return False;
10839 vassert(size >= 0 && size <= 3);
10840 /* The shift encoding has opposite sign for the leftwards case.
10841 Adjust shift to compensate. */
10842 UInt lanebits = 8 << size;
10843 shift = lanebits - shift;
10844 vassert(shift >= 0 && shift < lanebits);
10845 IROp op = mkVecSHLN(size);
10846 IRExpr* src = getQReg128(nn);
10847 IRTemp res = newTempV128();
10848 if (shift == 0) {
10849 assign(res, src);
10850 } else {
10851 assign(res, binop(op, src, mkU8(shift)));
10852 if (isSLI) {
10853 IRExpr* nmask = binop(mkVecSHRN(size),
10854 mkV128(0xFFFF), mkU8(lanebits - shift));
10855 IRTemp tmp = newTempV128();
10856 assign(tmp, binop(Iop_OrV128,
10857 mkexpr(res),
10858 binop(Iop_AndV128, getQReg128(dd), nmask)));
10859 res = tmp;
10860 }
10861 }
10862 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10863 HChar laneCh = "bhsd"[size];
10864 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10865 const HChar* nm = isSLI ? "sli" : "shl";
10866 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10867 nameQReg128(dd), nLanes, laneCh,
10868 nameQReg128(nn), nLanes, laneCh, shift);
10869 return True;
10870 }
10871
10872 if (opcode == BITS5(0,1,1,1,0)
10873 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
10874 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
10875 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
10876 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
10877 UInt size = 0;
10878 UInt shift = 0;
10879 Bool isQ = bitQ == 1;
10880 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10881 if (!ok || (bitQ == 0 && size == X11)) return False;
10882 vassert(size >= 0 && size <= 3);
10883 /* The shift encoding has opposite sign for the leftwards case.
10884 Adjust shift to compensate. */
10885 UInt lanebits = 8 << size;
10886 shift = lanebits - shift;
10887 vassert(shift >= 0 && shift < lanebits);
10888 const HChar* nm = NULL;
10889 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
10890 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
10891 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
10892 else vassert(0);
10893 IRTemp qDiff1 = IRTemp_INVALID;
10894 IRTemp qDiff2 = IRTemp_INVALID;
10895 IRTemp res = IRTemp_INVALID;
10896 IRTemp src = newTempV128();
10897 assign(src, getQReg128(nn));
10898 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
10899 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10900 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
10901 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
10902 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10903 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10904 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
10905 return True;
10906 }
10907
10908 if (bitU == 0
10909 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10910 /* -------- 0,10000 SHRN{,2} #imm -------- */
10911 /* -------- 0,10001 RSHRN{,2} #imm -------- */
10912 /* Narrows, and size is the narrow size. */
10913 UInt size = 0;
10914 UInt shift = 0;
10915 Bool is2 = bitQ == 1;
10916 Bool isR = opcode == BITS5(1,0,0,0,1);
10917 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10918 if (!ok || size == X11) return False;
10919 vassert(shift >= 1);
10920 IRTemp t1 = newTempV128();
10921 IRTemp t2 = newTempV128();
10922 IRTemp t3 = newTempV128();
10923 assign(t1, getQReg128(nn));
10924 assign(t2, isR ? binop(mkVecADD(size+1),
10925 mkexpr(t1),
10926 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
10927 : mkexpr(t1));
10928 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
10929 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
10930 putLO64andZUorPutHI64(is2, dd, t4);
10931 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10932 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10933 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
10934 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10935 return True;
10936 }
10937
10938 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
10939 || (bitU == 1
10940 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
10941 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
10942 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
10943 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
10944 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
10945 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
10946 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
10947 UInt size = 0;
10948 UInt shift = 0;
10949 Bool is2 = bitQ == 1;
10950 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10951 if (!ok || size == X11) return False;
10952 vassert(shift >= 1 && shift <= (8 << size));
10953 const HChar* nm = "??";
10954 IROp op = Iop_INVALID;
10955 /* Decide on the name and the operation. */
10956 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10957 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
10958 }
10959 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10960 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
10961 }
10962 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
10963 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
10964 }
10965 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10966 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
10967 }
10968 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
10969 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
10970 }
10971 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
10972 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
10973 }
10974 else vassert(0);
10975 /* Compute the result (Q, shifted value) pair. */
10976 IRTemp src128 = newTempV128();
10977 assign(src128, getQReg128(nn));
10978 IRTemp pair = newTempV128();
10979 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
10980 /* Update the result reg */
10981 IRTemp res64in128 = newTempV128();
10982 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
10983 putLO64andZUorPutHI64(is2, dd, res64in128);
10984 /* Update the Q flag. */
10985 IRTemp q64q64 = newTempV128();
10986 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
10987 IRTemp z128 = newTempV128();
10988 assign(z128, mkV128(0x0000));
10989 updateQCFLAGwithDifference(q64q64, z128);
10990 /* */
10991 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10992 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10993 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10994 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10995 return True;
10996 }
10997
10998 if (opcode == BITS5(1,0,1,0,0)) {
10999 /* -------- 0,10100 SSHLL{,2} #imm -------- */
11000 /* -------- 1,10100 USHLL{,2} #imm -------- */
11001 /* 31 28 22 18 15 9 4
11002 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
11003 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
11004 where Ta,Tb,sh
11005 = case immh of 1xxx -> invalid
11006 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
11007 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
11008 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
11009 0000 -> AdvSIMD modified immediate (???)
11010 */
11011 Bool isQ = bitQ == 1;
11012 Bool isU = bitU == 1;
11013 UInt immhb = (immh << 3) | immb;
11014 IRTemp src = newTempV128();
11015 IRTemp zero = newTempV128();
11016 IRExpr* res = NULL;
11017 UInt sh = 0;
11018 const HChar* ta = "??";
11019 const HChar* tb = "??";
11020 assign(src, getQReg128(nn));
11021 assign(zero, mkV128(0x0000));
11022 if (immh & 8) {
11023 /* invalid; don't assign to res */
11024 }
11025 else if (immh & 4) {
11026 sh = immhb - 32;
11027 vassert(sh < 32); /* so 32-sh is 1..32 */
11028 ta = "2d";
11029 tb = isQ ? "4s" : "2s";
11030 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
11031 : mk_InterleaveLO32x4(src, zero);
11032 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
11033 }
11034 else if (immh & 2) {
11035 sh = immhb - 16;
11036 vassert(sh < 16); /* so 16-sh is 1..16 */
11037 ta = "4s";
11038 tb = isQ ? "8h" : "4h";
11039 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
11040 : mk_InterleaveLO16x8(src, zero);
11041 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
11042 }
11043 else if (immh & 1) {
11044 sh = immhb - 8;
11045 vassert(sh < 8); /* so 8-sh is 1..8 */
11046 ta = "8h";
11047 tb = isQ ? "16b" : "8b";
11048 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
11049 : mk_InterleaveLO8x16(src, zero);
11050 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
11051 } else {
11052 vassert(immh == 0);
11053 /* invalid; don't assign to res */
11054 }
11055 /* */
11056 if (res) {
11057 putQReg128(dd, res);
11058 DIP("%cshll%s %s.%s, %s.%s, #%u\n",
11059 isU ? 'u' : 's', isQ ? "2" : "",
11060 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
11061 return True;
11062 }
11063 return False;
11064 }
11065
11066 if (opcode == BITS5(1,1,1,0,0)) {
11067 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11068 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
11069 /* If immh is of the form 00xx, the insn is invalid. */
11070 if (immh < BITS4(0,1,0,0)) return False;
11071 UInt size = 0;
11072 UInt fbits = 0;
11073 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
11074 /* The following holds because immh is never zero. */
11075 vassert(ok);
11076 /* The following holds because immh >= 0100. */
11077 vassert(size == X10 || size == X11);
11078 Bool isD = size == X11;
11079 Bool isU = bitU == 1;
11080 Bool isQ = bitQ == 1;
11081 if (isD && !isQ) return False; /* reject .1d case */
11082 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
11083 Double scale = two_to_the_minus(fbits);
11084 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
11085 : IRExpr_Const(IRConst_F32( (Float)scale ));
11086 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
11087 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
11088 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
11089 IRType tyF = isD ? Ity_F64 : Ity_F32;
11090 IRType tyI = isD ? Ity_I64 : Ity_I32;
11091 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
11092 vassert(nLanes == 2 || nLanes == 4);
11093 for (UInt i = 0; i < nLanes; i++) {
11094 IRTemp src = newTemp(tyI);
11095 IRTemp res = newTemp(tyF);
11096 IRTemp rm = mk_get_IR_rounding_mode();
11097 assign(src, getQRegLane(nn, i, tyI));
11098 assign(res, triop(opMUL, mkexpr(rm),
11099 binop(opCVT, mkexpr(rm), mkexpr(src)),
11100 scaleE));
11101 putQRegLane(dd, i, mkexpr(res));
11102 }
11103 if (!isQ) {
11104 putQRegLane(dd, 1, mkU64(0));
11105 }
11106 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11107 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
11108 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
11109 return True;
11110 }
11111
11112 if (opcode == BITS5(1,1,1,1,1)) {
11113 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
11114 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
11115 /* If immh is of the form 00xx, the insn is invalid. */
11116 if (immh < BITS4(0,1,0,0)) return False;
11117 UInt size = 0;
11118 UInt fbits = 0;
11119 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
11120 /* The following holds because immh is never zero. */
11121 vassert(ok);
11122 /* The following holds because immh >= 0100. */
11123 vassert(size == X10 || size == X11);
11124 Bool isD = size == X11;
11125 Bool isU = bitU == 1;
11126 Bool isQ = bitQ == 1;
11127 if (isD && !isQ) return False; /* reject .1d case */
11128 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
11129 Double scale = two_to_the_plus(fbits);
11130 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
11131 : IRExpr_Const(IRConst_F32( (Float)scale ));
11132 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
11133 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
11134 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
11135 IRType tyF = isD ? Ity_F64 : Ity_F32;
11136 IRType tyI = isD ? Ity_I64 : Ity_I32;
11137 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
11138 vassert(nLanes == 2 || nLanes == 4);
11139 for (UInt i = 0; i < nLanes; i++) {
11140 IRTemp src = newTemp(tyF);
11141 IRTemp res = newTemp(tyI);
11142 IRTemp rm = newTemp(Ity_I32);
11143 assign(src, getQRegLane(nn, i, tyF));
11144 assign(rm, mkU32(Irrm_ZERO));
11145 assign(res, binop(opCVT, mkexpr(rm),
11146 triop(opMUL, mkexpr(rm),
11147 mkexpr(src), scaleE)));
11148 putQRegLane(dd, i, mkexpr(res));
11149 }
11150 if (!isQ) {
11151 putQRegLane(dd, 1, mkU64(0));
11152 }
11153 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11154 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
11155 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
11156 return True;
11157 }
11158
11159 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11160 return False;
11161 # undef INSN
11162 }
11163
11164
11165 static
dis_AdvSIMD_three_different(DisResult * dres,UInt insn)11166 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
11167 {
11168 /* 31 30 29 28 23 21 20 15 11 9 4
11169 0 Q U 01110 size 1 m opcode 00 n d
11170 Decode fields: u,opcode
11171 */
11172 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11173 if (INSN(31,31) != 0
11174 || INSN(28,24) != BITS5(0,1,1,1,0)
11175 || INSN(21,21) != 1
11176 || INSN(11,10) != BITS2(0,0)) {
11177 return False;
11178 }
11179 UInt bitQ = INSN(30,30);
11180 UInt bitU = INSN(29,29);
11181 UInt size = INSN(23,22);
11182 UInt mm = INSN(20,16);
11183 UInt opcode = INSN(15,12);
11184 UInt nn = INSN(9,5);
11185 UInt dd = INSN(4,0);
11186 vassert(size < 4);
11187 Bool is2 = bitQ == 1;
11188
11189 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
11190 /* -------- 0,0000 SADDL{2} -------- */
11191 /* -------- 1,0000 UADDL{2} -------- */
11192 /* -------- 0,0010 SSUBL{2} -------- */
11193 /* -------- 1,0010 USUBL{2} -------- */
11194 /* Widens, and size refers to the narrow lanes. */
11195 if (size == X11) return False;
11196 vassert(size <= 2);
11197 Bool isU = bitU == 1;
11198 Bool isADD = opcode == BITS4(0,0,0,0);
11199 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11200 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11201 IRTemp res = newTempV128();
11202 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11203 mkexpr(argL), mkexpr(argR)));
11204 putQReg128(dd, mkexpr(res));
11205 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11206 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11207 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
11208 : (isU ? "usubl" : "ssubl");
11209 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11210 nameQReg128(dd), arrWide,
11211 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11212 return True;
11213 }
11214
11215 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
11216 /* -------- 0,0001 SADDW{2} -------- */
11217 /* -------- 1,0001 UADDW{2} -------- */
11218 /* -------- 0,0011 SSUBW{2} -------- */
11219 /* -------- 1,0011 USUBW{2} -------- */
11220 /* Widens, and size refers to the narrow lanes. */
11221 if (size == X11) return False;
11222 vassert(size <= 2);
11223 Bool isU = bitU == 1;
11224 Bool isADD = opcode == BITS4(0,0,0,1);
11225 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11226 IRTemp res = newTempV128();
11227 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11228 getQReg128(nn), mkexpr(argR)));
11229 putQReg128(dd, mkexpr(res));
11230 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11231 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11232 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
11233 : (isU ? "usubw" : "ssubw");
11234 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11235 nameQReg128(dd), arrWide,
11236 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
11237 return True;
11238 }
11239
11240 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
11241 /* -------- 0,0100 ADDHN{2} -------- */
11242 /* -------- 1,0100 RADDHN{2} -------- */
11243 /* -------- 0,0110 SUBHN{2} -------- */
11244 /* -------- 1,0110 RSUBHN{2} -------- */
11245 /* Narrows, and size refers to the narrowed lanes. */
11246 if (size == X11) return False;
11247 vassert(size <= 2);
11248 const UInt shift[3] = { 8, 16, 32 };
11249 Bool isADD = opcode == BITS4(0,1,0,0);
11250 Bool isR = bitU == 1;
11251 /* Combined elements in wide lanes */
11252 IRTemp wide = newTempV128();
11253 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11254 getQReg128(nn), getQReg128(mm));
11255 if (isR) {
11256 wideE = binop(mkVecADD(size+1),
11257 wideE,
11258 mkexpr(math_VEC_DUP_IMM(size+1,
11259 1ULL << (shift[size]-1))));
11260 }
11261 assign(wide, wideE);
11262 /* Top halves of elements, still in wide lanes */
11263 IRTemp shrd = newTempV128();
11264 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
11265 /* Elements now compacted into lower 64 bits */
11266 IRTemp new64 = newTempV128();
11267 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
11268 putLO64andZUorPutHI64(is2, dd, new64);
11269 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11270 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11271 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
11272 : (isR ? "rsubhn" : "subhn");
11273 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11274 nameQReg128(dd), arrNarrow,
11275 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
11276 return True;
11277 }
11278
11279 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
11280 /* -------- 0,0101 SABAL{2} -------- */
11281 /* -------- 1,0101 UABAL{2} -------- */
11282 /* -------- 0,0111 SABDL{2} -------- */
11283 /* -------- 1,0111 UABDL{2} -------- */
11284 /* Widens, and size refers to the narrow lanes. */
11285 if (size == X11) return False;
11286 vassert(size <= 2);
11287 Bool isU = bitU == 1;
11288 Bool isACC = opcode == BITS4(0,1,0,1);
11289 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11290 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
11291 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
11292 IRTemp res = newTempV128();
11293 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
11294 : mkexpr(abd));
11295 putQReg128(dd, mkexpr(res));
11296 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11297 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11298 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
11299 : (isU ? "uabdl" : "sabdl");
11300 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11301 nameQReg128(dd), arrWide,
11302 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11303 return True;
11304 }
11305
11306 if (opcode == BITS4(1,1,0,0)
11307 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
11308 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
11309 /* -------- 1,1100 UMULL{2} -------- */ // 0
11310 /* -------- 0,1000 SMLAL{2} -------- */ // 1
11311 /* -------- 1,1000 UMLAL{2} -------- */ // 1
11312 /* -------- 0,1010 SMLSL{2} -------- */ // 2
11313 /* -------- 1,1010 UMLSL{2} -------- */ // 2
11314 /* Widens, and size refers to the narrow lanes. */
11315 UInt ks = 3;
11316 switch (opcode) {
11317 case BITS4(1,1,0,0): ks = 0; break;
11318 case BITS4(1,0,0,0): ks = 1; break;
11319 case BITS4(1,0,1,0): ks = 2; break;
11320 default: vassert(0);
11321 }
11322 vassert(ks >= 0 && ks <= 2);
11323 if (size == X11) return False;
11324 vassert(size <= 2);
11325 Bool isU = bitU == 1;
11326 IRTemp vecN = newTempV128();
11327 IRTemp vecM = newTempV128();
11328 IRTemp vecD = newTempV128();
11329 assign(vecN, getQReg128(nn));
11330 assign(vecM, getQReg128(mm));
11331 assign(vecD, getQReg128(dd));
11332 IRTemp res = IRTemp_INVALID;
11333 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
11334 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11335 putQReg128(dd, mkexpr(res));
11336 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11337 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11338 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
11339 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
11340 nameQReg128(dd), arrWide,
11341 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11342 return True;
11343 }
11344
11345 if (bitU == 0
11346 && (opcode == BITS4(1,1,0,1)
11347 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
11348 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
11349 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
11350 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
11351 /* Widens, and size refers to the narrow lanes. */
11352 UInt ks = 3;
11353 switch (opcode) {
11354 case BITS4(1,1,0,1): ks = 0; break;
11355 case BITS4(1,0,0,1): ks = 1; break;
11356 case BITS4(1,0,1,1): ks = 2; break;
11357 default: vassert(0);
11358 }
11359 vassert(ks >= 0 && ks <= 2);
11360 if (size == X00 || size == X11) return False;
11361 vassert(size <= 2);
11362 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
11363 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11364 newTempsV128_3(&vecN, &vecM, &vecD);
11365 assign(vecN, getQReg128(nn));
11366 assign(vecM, getQReg128(mm));
11367 assign(vecD, getQReg128(dd));
11368 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11369 is2, size, "mas"[ks],
11370 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11371 putQReg128(dd, mkexpr(res));
11372 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11373 updateQCFLAGwithDifference(sat1q, sat1n);
11374 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11375 updateQCFLAGwithDifference(sat2q, sat2n);
11376 }
11377 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11378 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11379 const HChar* nm = ks == 0 ? "sqdmull"
11380 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11381 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11382 nameQReg128(dd), arrWide,
11383 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11384 return True;
11385 }
11386
11387 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
11388 /* -------- 0,1110 PMULL{2} -------- */
11389 /* Widens, and size refers to the narrow lanes. */
11390 if (size != X00 && size != X11) return False;
11391 IRTemp res = IRTemp_INVALID;
11392 IRExpr* srcN = getQReg128(nn);
11393 IRExpr* srcM = getQReg128(mm);
11394 const HChar* arrNarrow = NULL;
11395 const HChar* arrWide = NULL;
11396 if (size == X00) {
11397 res = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
11398 srcN, srcM);
11399 arrNarrow = nameArr_Q_SZ(bitQ, size);
11400 arrWide = nameArr_Q_SZ(1, size+1);
11401 } else {
11402 /* The same thing as the X00 case, except we have to call
11403 a helper to do it. */
11404 vassert(size == X11);
11405 res = newTemp(Ity_V128);
11406 IROp slice
11407 = is2 ? Iop_V128HIto64 : Iop_V128to64;
11408 IRExpr** args
11409 = mkIRExprVec_3( IRExpr_VECRET(),
11410 unop(slice, srcN), unop(slice, srcM));
11411 IRDirty* di
11412 = unsafeIRDirty_1_N( res, 0/*regparms*/,
11413 "arm64g_dirtyhelper_PMULLQ",
11414 &arm64g_dirtyhelper_PMULLQ, args);
11415 stmt(IRStmt_Dirty(di));
11416 /* We can't use nameArr_Q_SZ for this because it can't deal with
11417 Q-sized (128 bit) results. Hence do it by hand. */
11418 arrNarrow = bitQ == 0 ? "1d" : "2d";
11419 arrWide = "1q";
11420 }
11421 putQReg128(dd, mkexpr(res));
11422 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
11423 nameQReg128(dd), arrWide,
11424 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11425 return True;
11426 }
11427
11428 return False;
11429 # undef INSN
11430 }
11431
11432
11433 static
dis_AdvSIMD_three_same(DisResult * dres,UInt insn)11434 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
11435 {
11436 /* 31 30 29 28 23 21 20 15 10 9 4
11437 0 Q U 01110 size 1 m opcode 1 n d
11438 Decode fields: u,size,opcode
11439 */
11440 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11441 if (INSN(31,31) != 0
11442 || INSN(28,24) != BITS5(0,1,1,1,0)
11443 || INSN(21,21) != 1
11444 || INSN(10,10) != 1) {
11445 return False;
11446 }
11447 UInt bitQ = INSN(30,30);
11448 UInt bitU = INSN(29,29);
11449 UInt size = INSN(23,22);
11450 UInt mm = INSN(20,16);
11451 UInt opcode = INSN(15,11);
11452 UInt nn = INSN(9,5);
11453 UInt dd = INSN(4,0);
11454 vassert(size < 4);
11455
11456 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
11457 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
11458 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
11459 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
11460 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
11461 if (size == X11) return False;
11462 Bool isADD = opcode == BITS5(0,0,0,0,0);
11463 Bool isU = bitU == 1;
11464 /* Widen both args out, do the math, narrow to final result. */
11465 IRTemp argL = newTempV128();
11466 IRTemp argLhi = IRTemp_INVALID;
11467 IRTemp argLlo = IRTemp_INVALID;
11468 IRTemp argR = newTempV128();
11469 IRTemp argRhi = IRTemp_INVALID;
11470 IRTemp argRlo = IRTemp_INVALID;
11471 IRTemp resHi = newTempV128();
11472 IRTemp resLo = newTempV128();
11473 IRTemp res = IRTemp_INVALID;
11474 assign(argL, getQReg128(nn));
11475 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
11476 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
11477 assign(argR, getQReg128(mm));
11478 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
11479 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
11480 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
11481 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
11482 assign(resHi, binop(opSxR,
11483 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
11484 mkU8(1)));
11485 assign(resLo, binop(opSxR,
11486 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
11487 mkU8(1)));
11488 res = math_NARROW_LANES ( resHi, resLo, size );
11489 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11490 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
11491 : (isU ? "uhsub" : "shsub");
11492 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11493 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11494 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11495 return True;
11496 }
11497
11498 if (opcode == BITS5(0,0,0,1,0)) {
11499 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
11500 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
11501 if (bitQ == 0 && size == X11) return False; // implied 1d case
11502 Bool isU = bitU == 1;
11503 IRTemp argL = newTempV128();
11504 IRTemp argR = newTempV128();
11505 assign(argL, getQReg128(nn));
11506 assign(argR, getQReg128(mm));
11507 IRTemp res = math_RHADD(size, isU, argL, argR);
11508 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11509 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11510 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
11511 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11512 return True;
11513 }
11514
11515 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
11516 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
11517 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
11518 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
11519 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
11520 if (bitQ == 0 && size == X11) return False; // implied 1d case
11521 Bool isADD = opcode == BITS5(0,0,0,0,1);
11522 Bool isU = bitU == 1;
11523 IROp qop = Iop_INVALID;
11524 IROp nop = Iop_INVALID;
11525 if (isADD) {
11526 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
11527 nop = mkVecADD(size);
11528 } else {
11529 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
11530 nop = mkVecSUB(size);
11531 }
11532 IRTemp argL = newTempV128();
11533 IRTemp argR = newTempV128();
11534 IRTemp qres = newTempV128();
11535 IRTemp nres = newTempV128();
11536 assign(argL, getQReg128(nn));
11537 assign(argR, getQReg128(mm));
11538 assign(qres, math_MAYBE_ZERO_HI64_fromE(
11539 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11540 assign(nres, math_MAYBE_ZERO_HI64_fromE(
11541 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11542 putQReg128(dd, mkexpr(qres));
11543 updateQCFLAGwithDifference(qres, nres);
11544 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
11545 : (isU ? "uqsub" : "sqsub");
11546 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11547 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11548 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11549 return True;
11550 }
11551
11552 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
11553 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
11554 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
11555 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
11556 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
11557 Bool isORx = (size & 2) == 2;
11558 Bool invert = (size & 1) == 1;
11559 IRTemp res = newTempV128();
11560 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
11561 getQReg128(nn),
11562 invert ? unop(Iop_NotV128, getQReg128(mm))
11563 : getQReg128(mm)));
11564 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11565 const HChar* names[4] = { "and", "bic", "orr", "orn" };
11566 const HChar* ar = bitQ == 1 ? "16b" : "8b";
11567 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
11568 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
11569 return True;
11570 }
11571
11572 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
11573 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
11574 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
11575 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
11576 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
11577 IRTemp argD = newTempV128();
11578 IRTemp argN = newTempV128();
11579 IRTemp argM = newTempV128();
11580 assign(argD, getQReg128(dd));
11581 assign(argN, getQReg128(nn));
11582 assign(argM, getQReg128(mm));
11583 const IROp opXOR = Iop_XorV128;
11584 const IROp opAND = Iop_AndV128;
11585 const IROp opNOT = Iop_NotV128;
11586 IRTemp res = newTempV128();
11587 switch (size) {
11588 case BITS2(0,0): /* EOR */
11589 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
11590 break;
11591 case BITS2(0,1): /* BSL */
11592 assign(res, binop(opXOR, mkexpr(argM),
11593 binop(opAND,
11594 binop(opXOR, mkexpr(argM), mkexpr(argN)),
11595 mkexpr(argD))));
11596 break;
11597 case BITS2(1,0): /* BIT */
11598 assign(res, binop(opXOR, mkexpr(argD),
11599 binop(opAND,
11600 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11601 mkexpr(argM))));
11602 break;
11603 case BITS2(1,1): /* BIF */
11604 assign(res, binop(opXOR, mkexpr(argD),
11605 binop(opAND,
11606 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11607 unop(opNOT, mkexpr(argM)))));
11608 break;
11609 default:
11610 vassert(0);
11611 }
11612 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11613 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
11614 const HChar* arr = bitQ == 1 ? "16b" : "8b";
11615 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
11616 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11617 return True;
11618 }
11619
11620 if (opcode == BITS5(0,0,1,1,0)) {
11621 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11622 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11623 if (bitQ == 0 && size == X11) return False; // implied 1d case
11624 Bool isGT = bitU == 0;
11625 IRExpr* argL = getQReg128(nn);
11626 IRExpr* argR = getQReg128(mm);
11627 IRTemp res = newTempV128();
11628 assign(res,
11629 isGT ? binop(mkVecCMPGTS(size), argL, argR)
11630 : binop(mkVecCMPGTU(size), argL, argR));
11631 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11632 const HChar* nm = isGT ? "cmgt" : "cmhi";
11633 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11634 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11635 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11636 return True;
11637 }
11638
11639 if (opcode == BITS5(0,0,1,1,1)) {
11640 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
11641 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
11642 if (bitQ == 0 && size == X11) return False; // implied 1d case
11643 Bool isGE = bitU == 0;
11644 IRExpr* argL = getQReg128(nn);
11645 IRExpr* argR = getQReg128(mm);
11646 IRTemp res = newTempV128();
11647 assign(res,
11648 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
11649 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
11650 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11651 const HChar* nm = isGE ? "cmge" : "cmhs";
11652 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11653 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11654 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11655 return True;
11656 }
11657
11658 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
11659 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
11660 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
11661 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
11662 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
11663 if (bitQ == 0 && size == X11) return False; // implied 1d case
11664 Bool isU = bitU == 1;
11665 Bool isR = opcode == BITS5(0,1,0,1,0);
11666 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
11667 : (isU ? mkVecSHU(size) : mkVecSHS(size));
11668 IRTemp res = newTempV128();
11669 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11670 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11671 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
11672 : (isU ? "ushl" : "sshl");
11673 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11674 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11675 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11676 return True;
11677 }
11678
11679 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
11680 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
11681 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
11682 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
11683 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
11684 if (bitQ == 0 && size == X11) return False; // implied 1d case
11685 Bool isU = bitU == 1;
11686 Bool isR = opcode == BITS5(0,1,0,1,1);
11687 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
11688 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
11689 /* This is a bit tricky. If we're only interested in the lowest 64 bits
11690 of the result (viz, bitQ == 0), then we must adjust the operands to
11691 ensure that the upper part of the result, that we don't care about,
11692 doesn't pollute the returned Q value. To do this, zero out the upper
11693 operand halves beforehand. This works because it means, for the
11694 lanes we don't care about, we are shifting zero by zero, which can
11695 never saturate. */
11696 IRTemp res256 = newTemp(Ity_V256);
11697 IRTemp resSH = newTempV128();
11698 IRTemp resQ = newTempV128();
11699 IRTemp zero = newTempV128();
11700 assign(res256, binop(op,
11701 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
11702 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
11703 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
11704 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
11705 assign(zero, mkV128(0x0000));
11706 putQReg128(dd, mkexpr(resSH));
11707 updateQCFLAGwithDifference(resQ, zero);
11708 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
11709 : (isU ? "uqshl" : "sqshl");
11710 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11711 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11712 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11713 return True;
11714 }
11715
11716 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
11717 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
11718 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
11719 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
11720 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
11721 if (bitQ == 0 && size == X11) return False; // implied 1d case
11722 Bool isU = bitU == 1;
11723 Bool isMAX = (opcode & 1) == 0;
11724 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11725 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11726 IRTemp t = newTempV128();
11727 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11728 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11729 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
11730 : (isU ? "umin" : "smin");
11731 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11732 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11733 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11734 return True;
11735 }
11736
11737 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
11738 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
11739 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
11740 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
11741 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
11742 if (size == X11) return False; // 1d/2d cases not allowed
11743 Bool isU = bitU == 1;
11744 Bool isACC = opcode == BITS5(0,1,1,1,1);
11745 vassert(size <= 2);
11746 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
11747 IRTemp t2 = newTempV128();
11748 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
11749 : mkexpr(t1));
11750 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11751 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
11752 : (isU ? "uabd" : "sabd");
11753 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11754 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11755 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11756 return True;
11757 }
11758
11759 if (opcode == BITS5(1,0,0,0,0)) {
11760 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
11761 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
11762 if (bitQ == 0 && size == X11) return False; // implied 1d case
11763 Bool isSUB = bitU == 1;
11764 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
11765 IRTemp t = newTempV128();
11766 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11767 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11768 const HChar* nm = isSUB ? "sub" : "add";
11769 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11770 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11771 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11772 return True;
11773 }
11774
11775 if (opcode == BITS5(1,0,0,0,1)) {
11776 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
11777 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
11778 if (bitQ == 0 && size == X11) return False; // implied 1d case
11779 Bool isEQ = bitU == 1;
11780 IRExpr* argL = getQReg128(nn);
11781 IRExpr* argR = getQReg128(mm);
11782 IRTemp res = newTempV128();
11783 assign(res,
11784 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
11785 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
11786 binop(Iop_AndV128, argL, argR),
11787 mkV128(0x0000))));
11788 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11789 const HChar* nm = isEQ ? "cmeq" : "cmtst";
11790 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11791 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11792 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11793 return True;
11794 }
11795
11796 if (opcode == BITS5(1,0,0,1,0)) {
11797 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
11798 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
11799 if (bitQ == 0 && size == X11) return False; // implied 1d case
11800 Bool isMLS = bitU == 1;
11801 IROp opMUL = mkVecMUL(size);
11802 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
11803 IRTemp res = newTempV128();
11804 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
11805 assign(res, binop(opADDSUB,
11806 getQReg128(dd),
11807 binop(opMUL, getQReg128(nn), getQReg128(mm))));
11808 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11809 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11810 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
11811 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11812 return True;
11813 }
11814 return False;
11815 }
11816
11817 if (opcode == BITS5(1,0,0,1,1)) {
11818 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
11819 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
11820 if (bitQ == 0 && size == X11) return False; // implied 1d case
11821 Bool isPMUL = bitU == 1;
11822 const IROp opsPMUL[4]
11823 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
11824 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
11825 IRTemp res = newTempV128();
11826 if (opMUL != Iop_INVALID) {
11827 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
11828 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11829 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11830 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
11831 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11832 return True;
11833 }
11834 return False;
11835 }
11836
11837 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
11838 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
11839 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
11840 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
11841 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
11842 if (size == X11) return False;
11843 Bool isU = bitU == 1;
11844 Bool isMAX = opcode == BITS5(1,0,1,0,0);
11845 IRTemp vN = newTempV128();
11846 IRTemp vM = newTempV128();
11847 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11848 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11849 assign(vN, getQReg128(nn));
11850 assign(vM, getQReg128(mm));
11851 IRTemp res128 = newTempV128();
11852 assign(res128,
11853 binop(op,
11854 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11855 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
11856 /* In the half-width case, use CatEL32x4 to extract the half-width
11857 result from the full-width result. */
11858 IRExpr* res
11859 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11860 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11861 mkexpr(res128)))
11862 : mkexpr(res128);
11863 putQReg128(dd, res);
11864 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11865 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
11866 : (isU ? "uminp" : "sminp");
11867 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11868 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11869 return True;
11870 }
11871
11872 if (opcode == BITS5(1,0,1,1,0)) {
11873 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
11874 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
11875 if (size == X00 || size == X11) return False;
11876 Bool isR = bitU == 1;
11877 IRTemp res, sat1q, sat1n, vN, vM;
11878 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11879 newTempsV128_2(&vN, &vM);
11880 assign(vN, getQReg128(nn));
11881 assign(vM, getQReg128(mm));
11882 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11883 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11884 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
11885 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11886 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11887 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
11888 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11889 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11890 return True;
11891 }
11892
11893 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
11894 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
11895 if (bitQ == 0 && size == X11) return False; // implied 1d case
11896 IRTemp vN = newTempV128();
11897 IRTemp vM = newTempV128();
11898 assign(vN, getQReg128(nn));
11899 assign(vM, getQReg128(mm));
11900 IRTemp res128 = newTempV128();
11901 assign(res128,
11902 binop(mkVecADD(size),
11903 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11904 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
11905 /* In the half-width case, use CatEL32x4 to extract the half-width
11906 result from the full-width result. */
11907 IRExpr* res
11908 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11909 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11910 mkexpr(res128)))
11911 : mkexpr(res128);
11912 putQReg128(dd, res);
11913 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11914 DIP("addp %s.%s, %s.%s, %s.%s\n",
11915 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11916 return True;
11917 }
11918
11919 if (bitU == 0
11920 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11921 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11922 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11923 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11924 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11925 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11926 Bool isD = (size & 1) == 1;
11927 if (bitQ == 0 && isD) return False; // implied 1d case
11928 Bool isMIN = (size & 2) == 2;
11929 Bool isNM = opcode == BITS5(1,1,0,0,0);
11930 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
11931 IRTemp res = newTempV128();
11932 assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
11933 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11934 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11935 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
11936 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11937 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11938 return True;
11939 }
11940
11941 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
11942 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11943 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11944 Bool isD = (size & 1) == 1;
11945 Bool isSUB = (size & 2) == 2;
11946 if (bitQ == 0 && isD) return False; // implied 1d case
11947 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11948 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11949 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11950 IRTemp rm = mk_get_IR_rounding_mode();
11951 IRTemp t1 = newTempV128();
11952 IRTemp t2 = newTempV128();
11953 // FIXME: double rounding; use FMA primops instead
11954 assign(t1, triop(opMUL,
11955 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11956 assign(t2, triop(isSUB ? opSUB : opADD,
11957 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
11958 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11959 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11960 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
11961 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11962 return True;
11963 }
11964
11965 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
11966 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11967 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11968 Bool isD = (size & 1) == 1;
11969 Bool isSUB = (size & 2) == 2;
11970 if (bitQ == 0 && isD) return False; // implied 1d case
11971 const IROp ops[4]
11972 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
11973 IROp op = ops[size];
11974 IRTemp rm = mk_get_IR_rounding_mode();
11975 IRTemp t1 = newTempV128();
11976 IRTemp t2 = newTempV128();
11977 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11978 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
11979 putQReg128(dd, mkexpr(t2));
11980 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11981 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
11982 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11983 return True;
11984 }
11985
11986 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
11987 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11988 Bool isD = (size & 1) == 1;
11989 if (bitQ == 0 && isD) return False; // implied 1d case
11990 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11991 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11992 IRTemp rm = mk_get_IR_rounding_mode();
11993 IRTemp t1 = newTempV128();
11994 IRTemp t2 = newTempV128();
11995 // FIXME: use Abd primop instead?
11996 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11997 assign(t2, unop(opABS, mkexpr(t1)));
11998 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11999 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12000 DIP("fabd %s.%s, %s.%s, %s.%s\n",
12001 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12002 return True;
12003 }
12004
12005 if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
12006 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12007 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12008 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12009 Bool isD = (size & 1) == 1;
12010 Bool isMULX = bitU == 0;
12011 if (bitQ == 0 && isD) return False; // implied 1d case
12012 IRTemp rm = mk_get_IR_rounding_mode();
12013 IRTemp t1 = newTempV128();
12014 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12015 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12016 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12017 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12018 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
12019 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12020 return True;
12021 }
12022
12023 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
12024 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12025 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12026 Bool isD = (size & 1) == 1;
12027 if (bitQ == 0 && isD) return False; // implied 1d case
12028 Bool isGE = bitU == 1;
12029 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
12030 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
12031 IRTemp t1 = newTempV128();
12032 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
12033 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
12034 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12035 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12036 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
12037 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12038 return True;
12039 }
12040
12041 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
12042 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12043 Bool isD = (size & 1) == 1;
12044 if (bitQ == 0 && isD) return False; // implied 1d case
12045 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12046 IRTemp t1 = newTempV128();
12047 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
12048 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12049 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12050 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
12051 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12052 return True;
12053 }
12054
12055 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
12056 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12057 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12058 Bool isD = (size & 1) == 1;
12059 Bool isGT = (size & 2) == 2;
12060 if (bitQ == 0 && isD) return False; // implied 1d case
12061 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
12062 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
12063 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
12064 IRTemp t1 = newTempV128();
12065 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
12066 unop(opABS, getQReg128(nn)))); // swapd
12067 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
12068 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12069 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
12070 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12071 return True;
12072 }
12073
12074 if (bitU == 1
12075 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
12076 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12077 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12078 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12079 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12080 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
12081 Bool isD = (size & 1) == 1;
12082 if (bitQ == 0 && isD) return False; // implied 1d case
12083 Bool isMIN = (size & 2) == 2;
12084 Bool isNM = opcode == BITS5(1,1,0,0,0);
12085 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
12086 IRTemp srcN = newTempV128();
12087 IRTemp srcM = newTempV128();
12088 IRTemp preL = IRTemp_INVALID;
12089 IRTemp preR = IRTemp_INVALID;
12090 assign(srcN, getQReg128(nn));
12091 assign(srcM, getQReg128(mm));
12092 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
12093 srcM, srcN, isD, bitQ);
12094 putQReg128(
12095 dd, math_MAYBE_ZERO_HI64_fromE(
12096 bitQ,
12097 binop(opMXX, mkexpr(preL), mkexpr(preR))));
12098 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12099 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
12100 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
12101 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12102 return True;
12103 }
12104
12105 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
12106 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12107 Bool isD = size == X01;
12108 if (bitQ == 0 && isD) return False; // implied 1d case
12109 IRTemp srcN = newTempV128();
12110 IRTemp srcM = newTempV128();
12111 IRTemp preL = IRTemp_INVALID;
12112 IRTemp preR = IRTemp_INVALID;
12113 assign(srcN, getQReg128(nn));
12114 assign(srcM, getQReg128(mm));
12115 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
12116 srcM, srcN, isD, bitQ);
12117 putQReg128(
12118 dd, math_MAYBE_ZERO_HI64_fromE(
12119 bitQ,
12120 triop(mkVecADDF(isD ? 3 : 2),
12121 mkexpr(mk_get_IR_rounding_mode()),
12122 mkexpr(preL), mkexpr(preR))));
12123 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12124 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
12125 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12126 return True;
12127 }
12128
12129 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
12130 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12131 Bool isD = (size & 1) == 1;
12132 if (bitQ == 0 && isD) return False; // implied 1d case
12133 vassert(size <= 1);
12134 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
12135 IROp op = ops[size];
12136 IRTemp rm = mk_get_IR_rounding_mode();
12137 IRTemp t1 = newTempV128();
12138 IRTemp t2 = newTempV128();
12139 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
12140 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
12141 putQReg128(dd, mkexpr(t2));
12142 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12143 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
12144 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12145 return True;
12146 }
12147
12148 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
12149 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12150 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
12151 Bool isSQRT = (size & 2) == 2;
12152 Bool isD = (size & 1) == 1;
12153 if (bitQ == 0 && isD) return False; // implied 1d case
12154 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
12155 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
12156 IRTemp res = newTempV128();
12157 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
12158 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12159 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12160 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
12161 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
12162 return True;
12163 }
12164
12165 return False;
12166 # undef INSN
12167 }
12168
12169
12170 static
dis_AdvSIMD_two_reg_misc(DisResult * dres,UInt insn)12171 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
12172 {
12173 /* 31 30 29 28 23 21 16 11 9 4
12174 0 Q U 01110 size 10000 opcode 10 n d
12175 Decode fields: U,size,opcode
12176 */
12177 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12178 if (INSN(31,31) != 0
12179 || INSN(28,24) != BITS5(0,1,1,1,0)
12180 || INSN(21,17) != BITS5(1,0,0,0,0)
12181 || INSN(11,10) != BITS2(1,0)) {
12182 return False;
12183 }
12184 UInt bitQ = INSN(30,30);
12185 UInt bitU = INSN(29,29);
12186 UInt size = INSN(23,22);
12187 UInt opcode = INSN(16,12);
12188 UInt nn = INSN(9,5);
12189 UInt dd = INSN(4,0);
12190 vassert(size < 4);
12191
12192 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
12193 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
12194 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
12195 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
12196 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
12197 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
12198 vassert(size <= 2);
12199 IRTemp res = newTempV128();
12200 assign(res, unop(iops[size], getQReg128(nn)));
12201 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12202 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12203 DIP("%s %s.%s, %s.%s\n", "rev64",
12204 nameQReg128(dd), arr, nameQReg128(nn), arr);
12205 return True;
12206 }
12207
12208 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
12209 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
12210 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
12211 Bool isH = size == X01;
12212 IRTemp res = newTempV128();
12213 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
12214 assign(res, unop(iop, getQReg128(nn)));
12215 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12216 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12217 DIP("%s %s.%s, %s.%s\n", "rev32",
12218 nameQReg128(dd), arr, nameQReg128(nn), arr);
12219 return True;
12220 }
12221
12222 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
12223 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
12224 IRTemp res = newTempV128();
12225 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
12226 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12227 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12228 DIP("%s %s.%s, %s.%s\n", "rev16",
12229 nameQReg128(dd), arr, nameQReg128(nn), arr);
12230 return True;
12231 }
12232
12233 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
12234 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
12235 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
12236 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
12237 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
12238 /* Widens, and size refers to the narrow size. */
12239 if (size == X11) return False; // no 1d or 2d cases
12240 Bool isU = bitU == 1;
12241 Bool isACC = opcode == BITS5(0,0,1,1,0);
12242 IRTemp src = newTempV128();
12243 IRTemp sum = newTempV128();
12244 IRTemp res = newTempV128();
12245 assign(src, getQReg128(nn));
12246 assign(sum,
12247 binop(mkVecADD(size+1),
12248 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12249 isU, True/*fromOdd*/, size, mkexpr(src))),
12250 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12251 isU, False/*!fromOdd*/, size, mkexpr(src)))));
12252 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
12253 : mkexpr(sum));
12254 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12255 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12256 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
12257 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
12258 : (isU ? "uaddlp" : "saddlp"),
12259 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12260 return True;
12261 }
12262
12263 if (opcode == BITS5(0,0,0,1,1)) {
12264 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
12265 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
12266 if (bitQ == 0 && size == X11) return False; // implied 1d case
12267 Bool isUSQADD = bitU == 1;
12268 /* This is switched (in the US vs SU sense) deliberately.
12269 SUQADD corresponds to the ExtUSsatSS variants and
12270 USQADD corresponds to the ExtSUsatUU variants.
12271 See libvex_ir for more details. */
12272 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
12273 : mkVecQADDEXTUSSATSS(size);
12274 IROp nop = mkVecADD(size);
12275 IRTemp argL = newTempV128();
12276 IRTemp argR = newTempV128();
12277 IRTemp qres = newTempV128();
12278 IRTemp nres = newTempV128();
12279 /* Because the two arguments to the addition are implicitly
12280 extended differently (one signedly, the other unsignedly) it is
12281 important to present them to the primop in the correct order. */
12282 assign(argL, getQReg128(nn));
12283 assign(argR, getQReg128(dd));
12284 assign(qres, math_MAYBE_ZERO_HI64_fromE(
12285 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
12286 assign(nres, math_MAYBE_ZERO_HI64_fromE(
12287 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
12288 putQReg128(dd, mkexpr(qres));
12289 updateQCFLAGwithDifference(qres, nres);
12290 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12291 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
12292 nameQReg128(dd), arr, nameQReg128(nn), arr);
12293 return True;
12294 }
12295
12296 if (opcode == BITS5(0,0,1,0,0)) {
12297 /* -------- 0,xx,00100: CLS std6_std6 -------- */
12298 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
12299 if (size == X11) return False; // no 1d or 2d cases
12300 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
12301 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
12302 Bool isCLZ = bitU == 1;
12303 IRTemp res = newTempV128();
12304 vassert(size <= 2);
12305 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
12306 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12307 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12308 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
12309 nameQReg128(dd), arr, nameQReg128(nn), arr);
12310 return True;
12311 }
12312
12313 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
12314 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
12315 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
12316 IRTemp res = newTempV128();
12317 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
12318 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12319 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12320 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
12321 nameQReg128(dd), arr, nameQReg128(nn), arr);
12322 return True;
12323 }
12324
12325 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
12326 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
12327 IRTemp res = newTempV128();
12328 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
12329 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12330 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12331 DIP("%s %s.%s, %s.%s\n", "rbit",
12332 nameQReg128(dd), arr, nameQReg128(nn), arr);
12333 return True;
12334 }
12335
12336 if (opcode == BITS5(0,0,1,1,1)) {
12337 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
12338 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
12339 if (bitQ == 0 && size == X11) return False; // implied 1d case
12340 Bool isNEG = bitU == 1;
12341 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
12342 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
12343 getQReg128(nn), size );
12344 IRTemp qres = newTempV128(), nres = newTempV128();
12345 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
12346 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
12347 putQReg128(dd, mkexpr(qres));
12348 updateQCFLAGwithDifference(qres, nres);
12349 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12350 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
12351 nameQReg128(dd), arr, nameQReg128(nn), arr);
12352 return True;
12353 }
12354
12355 if (opcode == BITS5(0,1,0,0,0)) {
12356 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
12357 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
12358 if (bitQ == 0 && size == X11) return False; // implied 1d case
12359 Bool isGT = bitU == 0;
12360 IRExpr* argL = getQReg128(nn);
12361 IRExpr* argR = mkV128(0x0000);
12362 IRTemp res = newTempV128();
12363 IROp opGTS = mkVecCMPGTS(size);
12364 assign(res, isGT ? binop(opGTS, argL, argR)
12365 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
12366 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12367 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12368 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
12369 nameQReg128(dd), arr, nameQReg128(nn), arr);
12370 return True;
12371 }
12372
12373 if (opcode == BITS5(0,1,0,0,1)) {
12374 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
12375 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
12376 if (bitQ == 0 && size == X11) return False; // implied 1d case
12377 Bool isEQ = bitU == 0;
12378 IRExpr* argL = getQReg128(nn);
12379 IRExpr* argR = mkV128(0x0000);
12380 IRTemp res = newTempV128();
12381 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
12382 : unop(Iop_NotV128,
12383 binop(mkVecCMPGTS(size), argL, argR)));
12384 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12385 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12386 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
12387 nameQReg128(dd), arr, nameQReg128(nn), arr);
12388 return True;
12389 }
12390
12391 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
12392 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
12393 if (bitQ == 0 && size == X11) return False; // implied 1d case
12394 IRExpr* argL = getQReg128(nn);
12395 IRExpr* argR = mkV128(0x0000);
12396 IRTemp res = newTempV128();
12397 assign(res, binop(mkVecCMPGTS(size), argR, argL));
12398 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12399 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12400 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
12401 nameQReg128(dd), arr, nameQReg128(nn), arr);
12402 return True;
12403 }
12404
12405 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
12406 /* -------- 0,xx,01011: ABS std7_std7 -------- */
12407 if (bitQ == 0 && size == X11) return False; // implied 1d case
12408 IRTemp res = newTempV128();
12409 assign(res, unop(mkVecABS(size), getQReg128(nn)));
12410 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12411 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12412 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12413 return True;
12414 }
12415
12416 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
12417 /* -------- 1,xx,01011: NEG std7_std7 -------- */
12418 if (bitQ == 0 && size == X11) return False; // implied 1d case
12419 IRTemp res = newTempV128();
12420 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
12421 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12422 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12423 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12424 return True;
12425 }
12426
12427 UInt ix = 0; /*INVALID*/
12428 if (size >= X10) {
12429 switch (opcode) {
12430 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
12431 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
12432 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
12433 default: break;
12434 }
12435 }
12436 if (ix > 0) {
12437 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
12438 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
12439 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
12440 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
12441 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
12442 if (bitQ == 0 && size == X11) return False; // implied 1d case
12443 Bool isD = size == X11;
12444 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
12445 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
12446 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12447 IROp opCmp = Iop_INVALID;
12448 Bool swap = False;
12449 const HChar* nm = "??";
12450 switch (ix) {
12451 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
12452 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
12453 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
12454 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
12455 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
12456 default: vassert(0);
12457 }
12458 IRExpr* zero = mkV128(0x0000);
12459 IRTemp res = newTempV128();
12460 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
12461 : binop(opCmp, getQReg128(nn), zero));
12462 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12463 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12464 DIP("%s %s.%s, %s.%s, #0.0\n", nm,
12465 nameQReg128(dd), arr, nameQReg128(nn), arr);
12466 return True;
12467 }
12468
12469 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
12470 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
12471 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
12472 if (bitQ == 0 && size == X11) return False; // implied 1d case
12473 Bool isFNEG = bitU == 1;
12474 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
12475 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
12476 IRTemp res = newTempV128();
12477 assign(res, unop(op, getQReg128(nn)));
12478 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12479 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12480 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
12481 nameQReg128(dd), arr, nameQReg128(nn), arr);
12482 return True;
12483 }
12484
12485 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
12486 /* -------- 0,xx,10010: XTN{,2} -------- */
12487 if (size == X11) return False;
12488 vassert(size < 3);
12489 Bool is2 = bitQ == 1;
12490 IROp opN = mkVecNARROWUN(size);
12491 IRTemp resN = newTempV128();
12492 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
12493 putLO64andZUorPutHI64(is2, dd, resN);
12494 const HChar* nm = "xtn";
12495 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12496 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12497 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12498 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12499 return True;
12500 }
12501
12502 if (opcode == BITS5(1,0,1,0,0)
12503 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
12504 /* -------- 0,xx,10100: SQXTN{,2} -------- */
12505 /* -------- 1,xx,10100: UQXTN{,2} -------- */
12506 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
12507 if (size == X11) return False;
12508 vassert(size < 3);
12509 Bool is2 = bitQ == 1;
12510 IROp opN = Iop_INVALID;
12511 Bool zWiden = True;
12512 const HChar* nm = "??";
12513 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
12514 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
12515 }
12516 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
12517 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
12518 }
12519 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
12520 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
12521 }
12522 else vassert(0);
12523 IRTemp src = newTempV128();
12524 assign(src, getQReg128(nn));
12525 IRTemp resN = newTempV128();
12526 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
12527 putLO64andZUorPutHI64(is2, dd, resN);
12528 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
12529 size, mkexpr(resN));
12530 updateQCFLAGwithDifference(src, resW);
12531 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12532 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12533 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12534 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12535 return True;
12536 }
12537
12538 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
12539 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
12540 /* Widens, and size is the narrow size. */
12541 if (size == X11) return False;
12542 Bool is2 = bitQ == 1;
12543 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
12544 IROp opSHL = mkVecSHLN(size+1);
12545 IRTemp src = newTempV128();
12546 IRTemp res = newTempV128();
12547 assign(src, getQReg128(nn));
12548 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
12549 mkU8(8 << size)));
12550 putQReg128(dd, mkexpr(res));
12551 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12552 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12553 DIP("shll%s %s.%s, %s.%s, #%d\n", is2 ? "2" : "",
12554 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
12555 return True;
12556 }
12557
12558 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
12559 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
12560 UInt nLanes = size == X00 ? 4 : 2;
12561 IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
12562 IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
12563 IRTemp rm = mk_get_IR_rounding_mode();
12564 IRTemp src[nLanes];
12565 for (UInt i = 0; i < nLanes; i++) {
12566 src[i] = newTemp(srcTy);
12567 assign(src[i], getQRegLane(nn, i, srcTy));
12568 }
12569 for (UInt i = 0; i < nLanes; i++) {
12570 putQRegLane(dd, nLanes * bitQ + i,
12571 binop(opCvt, mkexpr(rm), mkexpr(src[i])));
12572 }
12573 if (bitQ == 0) {
12574 putQRegLane(dd, 1, mkU64(0));
12575 }
12576 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12577 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12578 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12579 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12580 return True;
12581 }
12582
12583 if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
12584 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
12585 /* Using Irrm_NEAREST here isn't right. The docs say "round to
12586 odd" but I don't know what that really means. */
12587 IRType srcTy = Ity_F64;
12588 IROp opCvt = Iop_F64toF32;
12589 IRTemp src[2];
12590 for (UInt i = 0; i < 2; i++) {
12591 src[i] = newTemp(srcTy);
12592 assign(src[i], getQRegLane(nn, i, srcTy));
12593 }
12594 for (UInt i = 0; i < 2; i++) {
12595 putQRegLane(dd, 2 * bitQ + i,
12596 binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
12597 }
12598 if (bitQ == 0) {
12599 putQRegLane(dd, 1, mkU64(0));
12600 }
12601 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12602 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12603 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12604 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12605 return True;
12606 }
12607
12608 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
12609 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
12610 UInt nLanes = size == X00 ? 4 : 2;
12611 IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
12612 IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
12613 IRTemp src[nLanes];
12614 for (UInt i = 0; i < nLanes; i++) {
12615 src[i] = newTemp(srcTy);
12616 assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
12617 }
12618 for (UInt i = 0; i < nLanes; i++) {
12619 putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
12620 }
12621 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12622 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12623 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12624 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12625 return True;
12626 }
12627
12628 ix = 0;
12629 if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
12630 ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
12631 // = 1 + bitU[0]:size[1]:opcode[0]
12632 vassert(ix >= 1 && ix <= 8);
12633 if (ix == 7) ix = 0;
12634 }
12635 if (ix > 0) {
12636 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
12637 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
12638 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
12639 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
12640 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
12641 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
12642 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
12643 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
12644 /* rm plan:
12645 FRINTN: tieeven -- !! FIXME KLUDGED !!
12646 FRINTM: -inf
12647 FRINTP: +inf
12648 FRINTZ: zero
12649 FRINTA: tieaway -- !! FIXME KLUDGED !!
12650 FRINTX: per FPCR + "exact = TRUE"
12651 FRINTI: per FPCR
12652 */
12653 Bool isD = (size & 1) == 1;
12654 if (bitQ == 0 && isD) return False; // implied 1d case
12655
12656 IRTemp irrmRM = mk_get_IR_rounding_mode();
12657
12658 UChar ch = '?';
12659 IRTemp irrm = newTemp(Ity_I32);
12660 switch (ix) {
12661 case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12662 case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
12663 case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
12664 case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
12665 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
12666 case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12667 // I am unsure about the following, due to the "integral exact"
12668 // description in the manual. What does it mean? (frintx, that is)
12669 case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
12670 case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
12671 default: vassert(0);
12672 }
12673
12674 IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
12675 if (isD) {
12676 for (UInt i = 0; i < 2; i++) {
12677 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12678 getQRegLane(nn, i, Ity_F64)));
12679 }
12680 } else {
12681 UInt n = bitQ==1 ? 4 : 2;
12682 for (UInt i = 0; i < n; i++) {
12683 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12684 getQRegLane(nn, i, Ity_F32)));
12685 }
12686 if (bitQ == 0)
12687 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12688 }
12689 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12690 DIP("frint%c %s.%s, %s.%s\n", ch,
12691 nameQReg128(dd), arr, nameQReg128(nn), arr);
12692 return True;
12693 }
12694
12695 ix = 0; /*INVALID*/
12696 switch (opcode) {
12697 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
12698 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
12699 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
12700 default: break;
12701 }
12702 if (ix > 0) {
12703 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12704 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12705 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12706 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12707 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12708 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12709 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12710 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12711 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12712 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12713 Bool isD = (size & 1) == 1;
12714 if (bitQ == 0 && isD) return False; // implied 1d case
12715
12716 IRRoundingMode irrm = 8; /*impossible*/
12717 HChar ch = '?';
12718 switch (ix) {
12719 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
12720 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
12721 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
12722 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
12723 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
12724 default: vassert(0);
12725 }
12726 IROp cvt = Iop_INVALID;
12727 if (bitU == 1) {
12728 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
12729 } else {
12730 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
12731 }
12732 if (isD) {
12733 for (UInt i = 0; i < 2; i++) {
12734 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12735 getQRegLane(nn, i, Ity_F64)));
12736 }
12737 } else {
12738 UInt n = bitQ==1 ? 4 : 2;
12739 for (UInt i = 0; i < n; i++) {
12740 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12741 getQRegLane(nn, i, Ity_F32)));
12742 }
12743 if (bitQ == 0)
12744 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12745 }
12746 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12747 DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
12748 nameQReg128(dd), arr, nameQReg128(nn), arr);
12749 return True;
12750 }
12751
12752 if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
12753 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
12754 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
12755 Bool isREC = bitU == 0;
12756 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
12757 IRTemp res = newTempV128();
12758 assign(res, unop(op, getQReg128(nn)));
12759 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12760 const HChar* nm = isREC ? "urecpe" : "ursqrte";
12761 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12762 DIP("%s %s.%s, %s.%s\n", nm,
12763 nameQReg128(dd), arr, nameQReg128(nn), arr);
12764 return True;
12765 }
12766
12767 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
12768 /* -------- 0,0x,11101: SCVTF -------- */
12769 /* -------- 1,0x,11101: UCVTF -------- */
12770 /* 31 28 22 21 15 9 4
12771 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
12772 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
12773 with laneage:
12774 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
12775 */
12776 Bool isQ = bitQ == 1;
12777 Bool isU = bitU == 1;
12778 Bool isF64 = (size & 1) == 1;
12779 if (isQ || !isF64) {
12780 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
12781 UInt nLanes = 0;
12782 Bool zeroHI = False;
12783 const HChar* arrSpec = NULL;
12784 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
12785 isQ, isF64 );
12786 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
12787 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
12788 IRTemp rm = mk_get_IR_rounding_mode();
12789 UInt i;
12790 vassert(ok); /* the 'if' above should ensure this */
12791 for (i = 0; i < nLanes; i++) {
12792 putQRegLane(dd, i,
12793 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
12794 }
12795 if (zeroHI) {
12796 putQRegLane(dd, 1, mkU64(0));
12797 }
12798 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
12799 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
12800 return True;
12801 }
12802 /* else fall through */
12803 }
12804
12805 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
12806 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
12807 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
12808 Bool isSQRT = bitU == 1;
12809 Bool isD = (size & 1) == 1;
12810 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
12811 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
12812 if (bitQ == 0 && isD) return False; // implied 1d case
12813 IRTemp resV = newTempV128();
12814 assign(resV, unop(op, getQReg128(nn)));
12815 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12816 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12817 DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
12818 nameQReg128(dd), arr, nameQReg128(nn), arr);
12819 return True;
12820 }
12821
12822 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
12823 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
12824 Bool isD = (size & 1) == 1;
12825 IROp op = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
12826 if (bitQ == 0 && isD) return False; // implied 1d case
12827 IRTemp resV = newTempV128();
12828 assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
12829 getQReg128(nn)));
12830 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12831 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12832 DIP("%s %s.%s, %s.%s\n", "fsqrt",
12833 nameQReg128(dd), arr, nameQReg128(nn), arr);
12834 return True;
12835 }
12836
12837 return False;
12838 # undef INSN
12839 }
12840
12841
12842 static
dis_AdvSIMD_vector_x_indexed_elem(DisResult * dres,UInt insn)12843 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
12844 {
12845 /* 31 28 23 21 20 19 15 11 9 4
12846 0 Q U 01111 size L M m opcode H 0 n d
12847 Decode fields are: u,size,opcode
12848 M is really part of the mm register number. Individual
12849 cases need to inspect L and H though.
12850 */
12851 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12852 if (INSN(31,31) != 0
12853 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
12854 return False;
12855 }
12856 UInt bitQ = INSN(30,30);
12857 UInt bitU = INSN(29,29);
12858 UInt size = INSN(23,22);
12859 UInt bitL = INSN(21,21);
12860 UInt bitM = INSN(20,20);
12861 UInt mmLO4 = INSN(19,16);
12862 UInt opcode = INSN(15,12);
12863 UInt bitH = INSN(11,11);
12864 UInt nn = INSN(9,5);
12865 UInt dd = INSN(4,0);
12866 vassert(size < 4);
12867 vassert(bitH < 2 && bitM < 2 && bitL < 2);
12868
12869 if (bitU == 0 && size >= X10
12870 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
12871 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12872 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12873 if (bitQ == 0 && size == X11) return False; // implied 1d case
12874 Bool isD = (size & 1) == 1;
12875 Bool isSUB = opcode == BITS4(0,1,0,1);
12876 UInt index;
12877 if (!isD) index = (bitH << 1) | bitL;
12878 else if (isD && bitL == 0) index = bitH;
12879 else return False; // sz:L == x11 => unallocated encoding
12880 vassert(index < (isD ? 2 : 4));
12881 IRType ity = isD ? Ity_F64 : Ity_F32;
12882 IRTemp elem = newTemp(ity);
12883 UInt mm = (bitM << 4) | mmLO4;
12884 assign(elem, getQRegLane(mm, index, ity));
12885 IRTemp dupd = math_DUP_TO_V128(elem, ity);
12886 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
12887 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12888 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
12889 IRTemp rm = mk_get_IR_rounding_mode();
12890 IRTemp t1 = newTempV128();
12891 IRTemp t2 = newTempV128();
12892 // FIXME: double rounding; use FMA primops instead
12893 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
12894 assign(t2, triop(isSUB ? opSUB : opADD,
12895 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
12896 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12897 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12898 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
12899 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
12900 isD ? 'd' : 's', index);
12901 return True;
12902 }
12903
12904 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
12905 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12906 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12907 if (bitQ == 0 && size == X11) return False; // implied 1d case
12908 Bool isD = (size & 1) == 1;
12909 Bool isMULX = bitU == 1;
12910 UInt index;
12911 if (!isD) index = (bitH << 1) | bitL;
12912 else if (isD && bitL == 0) index = bitH;
12913 else return False; // sz:L == x11 => unallocated encoding
12914 vassert(index < (isD ? 2 : 4));
12915 IRType ity = isD ? Ity_F64 : Ity_F32;
12916 IRTemp elem = newTemp(ity);
12917 UInt mm = (bitM << 4) | mmLO4;
12918 assign(elem, getQRegLane(mm, index, ity));
12919 IRTemp dupd = math_DUP_TO_V128(elem, ity);
12920 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12921 IRTemp res = newTempV128();
12922 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12923 mkexpr(mk_get_IR_rounding_mode()),
12924 getQReg128(nn), mkexpr(dupd)));
12925 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12926 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12927 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
12928 isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
12929 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
12930 return True;
12931 }
12932
12933 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
12934 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
12935 /* -------- 1,xx,0000 MLA s/h variants only -------- */
12936 /* -------- 1,xx,0100 MLS s/h variants only -------- */
12937 /* -------- 0,xx,1000 MUL s/h variants only -------- */
12938 Bool isMLA = opcode == BITS4(0,0,0,0);
12939 Bool isMLS = opcode == BITS4(0,1,0,0);
12940 UInt mm = 32; // invalid
12941 UInt ix = 16; // invalid
12942 switch (size) {
12943 case X00:
12944 return False; // b case is not allowed
12945 case X01:
12946 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12947 case X10:
12948 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12949 case X11:
12950 return False; // d case is not allowed
12951 default:
12952 vassert(0);
12953 }
12954 vassert(mm < 32 && ix < 16);
12955 IROp opMUL = mkVecMUL(size);
12956 IROp opADD = mkVecADD(size);
12957 IROp opSUB = mkVecSUB(size);
12958 HChar ch = size == X01 ? 'h' : 's';
12959 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12960 IRTemp vecD = newTempV128();
12961 IRTemp vecN = newTempV128();
12962 IRTemp res = newTempV128();
12963 assign(vecD, getQReg128(dd));
12964 assign(vecN, getQReg128(nn));
12965 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
12966 if (isMLA || isMLS) {
12967 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
12968 } else {
12969 assign(res, prod);
12970 }
12971 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12972 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12973 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
12974 : (isMLS ? "mls" : "mul"),
12975 nameQReg128(dd), arr,
12976 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12977 return True;
12978 }
12979
12980 if (opcode == BITS4(1,0,1,0)
12981 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
12982 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
12983 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
12984 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
12985 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
12986 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
12987 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
12988 /* Widens, and size refers to the narrowed lanes. */
12989 UInt ks = 3;
12990 switch (opcode) {
12991 case BITS4(1,0,1,0): ks = 0; break;
12992 case BITS4(0,0,1,0): ks = 1; break;
12993 case BITS4(0,1,1,0): ks = 2; break;
12994 default: vassert(0);
12995 }
12996 vassert(ks >= 0 && ks <= 2);
12997 Bool isU = bitU == 1;
12998 Bool is2 = bitQ == 1;
12999 UInt mm = 32; // invalid
13000 UInt ix = 16; // invalid
13001 switch (size) {
13002 case X00:
13003 return False; // h_b_b[] case is not allowed
13004 case X01:
13005 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13006 case X10:
13007 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13008 case X11:
13009 return False; // q_d_d[] case is not allowed
13010 default:
13011 vassert(0);
13012 }
13013 vassert(mm < 32 && ix < 16);
13014 IRTemp vecN = newTempV128();
13015 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13016 IRTemp vecD = newTempV128();
13017 assign(vecN, getQReg128(nn));
13018 assign(vecD, getQReg128(dd));
13019 IRTemp res = IRTemp_INVALID;
13020 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
13021 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
13022 putQReg128(dd, mkexpr(res));
13023 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
13024 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13025 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13026 HChar ch = size == X01 ? 'h' : 's';
13027 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13028 isU ? 'u' : 's', nm, is2 ? "2" : "",
13029 nameQReg128(dd), arrWide,
13030 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
13031 return True;
13032 }
13033
13034 if (bitU == 0
13035 && (opcode == BITS4(1,0,1,1)
13036 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
13037 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
13038 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
13039 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
13040 /* Widens, and size refers to the narrowed lanes. */
13041 UInt ks = 3;
13042 switch (opcode) {
13043 case BITS4(1,0,1,1): ks = 0; break;
13044 case BITS4(0,0,1,1): ks = 1; break;
13045 case BITS4(0,1,1,1): ks = 2; break;
13046 default: vassert(0);
13047 }
13048 vassert(ks >= 0 && ks <= 2);
13049 Bool is2 = bitQ == 1;
13050 UInt mm = 32; // invalid
13051 UInt ix = 16; // invalid
13052 switch (size) {
13053 case X00:
13054 return False; // h_b_b[] case is not allowed
13055 case X01:
13056 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13057 case X10:
13058 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13059 case X11:
13060 return False; // q_d_d[] case is not allowed
13061 default:
13062 vassert(0);
13063 }
13064 vassert(mm < 32 && ix < 16);
13065 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
13066 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
13067 newTempsV128_2(&vecN, &vecD);
13068 assign(vecN, getQReg128(nn));
13069 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13070 assign(vecD, getQReg128(dd));
13071 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
13072 is2, size, "mas"[ks],
13073 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
13074 putQReg128(dd, mkexpr(res));
13075 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
13076 updateQCFLAGwithDifference(sat1q, sat1n);
13077 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
13078 updateQCFLAGwithDifference(sat2q, sat2n);
13079 }
13080 const HChar* nm = ks == 0 ? "sqdmull"
13081 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
13082 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
13083 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
13084 HChar ch = size == X01 ? 'h' : 's';
13085 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
13086 nm, is2 ? "2" : "",
13087 nameQReg128(dd), arrWide,
13088 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
13089 return True;
13090 }
13091
13092 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
13093 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
13094 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
13095 UInt mm = 32; // invalid
13096 UInt ix = 16; // invalid
13097 switch (size) {
13098 case X00:
13099 return False; // b case is not allowed
13100 case X01:
13101 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
13102 case X10:
13103 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
13104 case X11:
13105 return False; // q case is not allowed
13106 default:
13107 vassert(0);
13108 }
13109 vassert(mm < 32 && ix < 16);
13110 Bool isR = opcode == BITS4(1,1,0,1);
13111 IRTemp res, sat1q, sat1n, vN, vM;
13112 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
13113 vN = newTempV128();
13114 assign(vN, getQReg128(nn));
13115 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
13116 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
13117 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
13118 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
13119 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
13120 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
13121 const HChar* arr = nameArr_Q_SZ(bitQ, size);
13122 HChar ch = size == X01 ? 'h' : 's';
13123 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
13124 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
13125 return True;
13126 }
13127
13128 return False;
13129 # undef INSN
13130 }
13131
13132
13133 static
dis_AdvSIMD_crypto_aes(DisResult * dres,UInt insn)13134 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
13135 {
13136 /* 31 23 21 16 11 9 4
13137 0100 1110 size 10100 opcode 10 n d
13138 Decode fields are: size,opcode
13139 Size is always 00 in ARMv8, it appears.
13140 */
13141 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13142 if (INSN(31,24) != BITS8(0,1,0,0,1,1,1,0)
13143 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13144 return False;
13145 }
13146 UInt size = INSN(23,22);
13147 UInt opcode = INSN(16,12);
13148 UInt nn = INSN(9,5);
13149 UInt dd = INSN(4,0);
13150
13151 if (size == BITS2(0,0)
13152 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,0,1))) {
13153 /* -------- 00,00100: AESE Vd.16b, Vn.16b -------- */
13154 /* -------- 00,00101: AESD Vd.16b, Vn.16b -------- */
13155 Bool isD = opcode == BITS5(0,0,1,0,1);
13156 IRTemp op1 = newTemp(Ity_V128);
13157 IRTemp op2 = newTemp(Ity_V128);
13158 IRTemp xord = newTemp(Ity_V128);
13159 IRTemp res = newTemp(Ity_V128);
13160 void* helper = isD ? &arm64g_dirtyhelper_AESD
13161 : &arm64g_dirtyhelper_AESE;
13162 const HChar* hname = isD ? "arm64g_dirtyhelper_AESD"
13163 : "arm64g_dirtyhelper_AESE";
13164 assign(op1, getQReg128(dd));
13165 assign(op2, getQReg128(nn));
13166 assign(xord, binop(Iop_XorV128, mkexpr(op1), mkexpr(op2)));
13167 IRDirty* di
13168 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13169 mkIRExprVec_3(
13170 IRExpr_VECRET(),
13171 unop(Iop_V128HIto64, mkexpr(xord)),
13172 unop(Iop_V128to64, mkexpr(xord)) ) );
13173 stmt(IRStmt_Dirty(di));
13174 putQReg128(dd, mkexpr(res));
13175 DIP("aes%c %s.16b, %s.16b\n", isD ? 'd' : 'e',
13176 nameQReg128(dd), nameQReg128(nn));
13177 return True;
13178 }
13179
13180 if (size == BITS2(0,0)
13181 && (opcode == BITS5(0,0,1,1,0) || opcode == BITS5(0,0,1,1,1))) {
13182 /* -------- 00,00110: AESMC Vd.16b, Vn.16b -------- */
13183 /* -------- 00,00111: AESIMC Vd.16b, Vn.16b -------- */
13184 Bool isI = opcode == BITS5(0,0,1,1,1);
13185 IRTemp src = newTemp(Ity_V128);
13186 IRTemp res = newTemp(Ity_V128);
13187 void* helper = isI ? &arm64g_dirtyhelper_AESIMC
13188 : &arm64g_dirtyhelper_AESMC;
13189 const HChar* hname = isI ? "arm64g_dirtyhelper_AESIMC"
13190 : "arm64g_dirtyhelper_AESMC";
13191 assign(src, getQReg128(nn));
13192 IRDirty* di
13193 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13194 mkIRExprVec_3(
13195 IRExpr_VECRET(),
13196 unop(Iop_V128HIto64, mkexpr(src)),
13197 unop(Iop_V128to64, mkexpr(src)) ) );
13198 stmt(IRStmt_Dirty(di));
13199 putQReg128(dd, mkexpr(res));
13200 DIP("aes%s %s.16b, %s.16b\n", isI ? "imc" : "mc",
13201 nameQReg128(dd), nameQReg128(nn));
13202 return True;
13203 }
13204
13205 return False;
13206 # undef INSN
13207 }
13208
13209
13210 static
dis_AdvSIMD_crypto_three_reg_sha(DisResult * dres,UInt insn)13211 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13212 {
13213 /* 31 28 23 21 20 15 14 11 9 4
13214 0101 1110 sz 0 m 0 opc 00 n d
13215 Decode fields are: sz,opc
13216 */
13217 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13218 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0) || INSN(21,21) != 0
13219 || INSN(15,15) != 0 || INSN(11,10) != BITS2(0,0)) {
13220 return False;
13221 }
13222 UInt sz = INSN(23,22);
13223 UInt mm = INSN(20,16);
13224 UInt opc = INSN(14,12);
13225 UInt nn = INSN(9,5);
13226 UInt dd = INSN(4,0);
13227 if (sz == BITS2(0,0) && opc <= BITS3(1,1,0)) {
13228 /* -------- 00,000 SHA1C Qd, Sn, Vm.4S -------- */
13229 /* -------- 00,001 SHA1P Qd, Sn, Vm.4S -------- */
13230 /* -------- 00,010 SHA1M Qd, Sn, Vm.4S -------- */
13231 /* -------- 00,011 SHA1SU0 Vd.4S, Vn.4S, Vm.4S -------- */
13232 /* -------- 00,100 SHA256H Qd, Qn, Vm.4S -------- */
13233 /* -------- 00,101 SHA256H2 Qd, Qn, Vm.4S -------- */
13234 /* -------- 00,110 SHA256SU1 Vd.4S, Vn.4S, Vm.4S -------- */
13235 vassert(opc < 7);
13236 const HChar* inames[7]
13237 = { "sha1c", "sha1p", "sha1m", "sha1su0",
13238 "sha256h", "sha256h2", "sha256su1" };
13239 void(*helpers[7])(V128*,ULong,ULong,ULong,ULong,ULong,ULong)
13240 = { &arm64g_dirtyhelper_SHA1C, &arm64g_dirtyhelper_SHA1P,
13241 &arm64g_dirtyhelper_SHA1M, &arm64g_dirtyhelper_SHA1SU0,
13242 &arm64g_dirtyhelper_SHA256H, &arm64g_dirtyhelper_SHA256H2,
13243 &arm64g_dirtyhelper_SHA256SU1 };
13244 const HChar* hnames[7]
13245 = { "arm64g_dirtyhelper_SHA1C", "arm64g_dirtyhelper_SHA1P",
13246 "arm64g_dirtyhelper_SHA1M", "arm64g_dirtyhelper_SHA1SU0",
13247 "arm64g_dirtyhelper_SHA256H", "arm64g_dirtyhelper_SHA256H2",
13248 "arm64g_dirtyhelper_SHA256SU1" };
13249 IRTemp vD = newTemp(Ity_V128);
13250 IRTemp vN = newTemp(Ity_V128);
13251 IRTemp vM = newTemp(Ity_V128);
13252 IRTemp vDhi = newTemp(Ity_I64);
13253 IRTemp vDlo = newTemp(Ity_I64);
13254 IRTemp vNhiPre = newTemp(Ity_I64);
13255 IRTemp vNloPre = newTemp(Ity_I64);
13256 IRTemp vNhi = newTemp(Ity_I64);
13257 IRTemp vNlo = newTemp(Ity_I64);
13258 IRTemp vMhi = newTemp(Ity_I64);
13259 IRTemp vMlo = newTemp(Ity_I64);
13260 assign(vD, getQReg128(dd));
13261 assign(vN, getQReg128(nn));
13262 assign(vM, getQReg128(mm));
13263 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
13264 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
13265 assign(vNhiPre, unop(Iop_V128HIto64, mkexpr(vN)));
13266 assign(vNloPre, unop(Iop_V128to64, mkexpr(vN)));
13267 assign(vMhi, unop(Iop_V128HIto64, mkexpr(vM)));
13268 assign(vMlo, unop(Iop_V128to64, mkexpr(vM)));
13269 /* Mask off any bits of the N register operand that aren't actually
13270 needed, so that Memcheck doesn't complain unnecessarily. */
13271 switch (opc) {
13272 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13273 assign(vNhi, mkU64(0));
13274 assign(vNlo, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(vNloPre))));
13275 break;
13276 case BITS3(0,1,1): case BITS3(1,0,0):
13277 case BITS3(1,0,1): case BITS3(1,1,0):
13278 assign(vNhi, mkexpr(vNhiPre));
13279 assign(vNlo, mkexpr(vNloPre));
13280 break;
13281 default:
13282 vassert(0);
13283 }
13284 IRTemp res = newTemp(Ity_V128);
13285 IRDirty* di
13286 = unsafeIRDirty_1_N( res, 0/*regparms*/, hnames[opc], helpers[opc],
13287 mkIRExprVec_7(
13288 IRExpr_VECRET(),
13289 mkexpr(vDhi), mkexpr(vDlo), mkexpr(vNhi),
13290 mkexpr(vNlo), mkexpr(vMhi), mkexpr(vMlo)));
13291 stmt(IRStmt_Dirty(di));
13292 putQReg128(dd, mkexpr(res));
13293 switch (opc) {
13294 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13295 DIP("%s q%u, s%u, v%u.4s\n", inames[opc], dd, nn, mm);
13296 break;
13297 case BITS3(0,1,1): case BITS3(1,1,0):
13298 DIP("%s v%u.4s, v%u.4s, v%u.4s\n", inames[opc], dd, nn, mm);
13299 break;
13300 case BITS3(1,0,0): case BITS3(1,0,1):
13301 DIP("%s q%u, q%u, v%u.4s\n", inames[opc], dd, nn, mm);
13302 break;
13303 default:
13304 vassert(0);
13305 }
13306 return True;
13307 }
13308
13309 return False;
13310 # undef INSN
13311 }
13312
13313
13314 static
dis_AdvSIMD_crypto_two_reg_sha(DisResult * dres,UInt insn)13315 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13316 {
13317 /* 31 28 23 21 16 11 9 4
13318 0101 1110 sz 10100 opc 10 n d
13319 Decode fields are: sz,opc
13320 */
13321 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13322 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0)
13323 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13324 return False;
13325 }
13326 UInt sz = INSN(23,22);
13327 UInt opc = INSN(16,12);
13328 UInt nn = INSN(9,5);
13329 UInt dd = INSN(4,0);
13330 if (sz == BITS2(0,0) && opc <= BITS5(0,0,0,1,0)) {
13331 /* -------- 00,00000 SHA1H Sd, Sn -------- */
13332 /* -------- 00,00001 SHA1SU1 Vd.4S, Vn.4S -------- */
13333 /* -------- 00,00010 SHA256SU0 Vd.4S, Vn.4S -------- */
13334 vassert(opc < 3);
13335 const HChar* inames[3] = { "sha1h", "sha1su1", "sha256su0" };
13336 IRTemp vD = newTemp(Ity_V128);
13337 IRTemp vN = newTemp(Ity_V128);
13338 IRTemp vDhi = newTemp(Ity_I64);
13339 IRTemp vDlo = newTemp(Ity_I64);
13340 IRTemp vNhi = newTemp(Ity_I64);
13341 IRTemp vNlo = newTemp(Ity_I64);
13342 assign(vD, getQReg128(dd));
13343 assign(vN, getQReg128(nn));
13344 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
13345 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
13346 assign(vNhi, unop(Iop_V128HIto64, mkexpr(vN)));
13347 assign(vNlo, unop(Iop_V128to64, mkexpr(vN)));
13348 /* Mask off any bits of the N register operand that aren't actually
13349 needed, so that Memcheck doesn't complain unnecessarily. Also
13350 construct the calls, given that the helper functions don't take
13351 the same number of arguments. */
13352 IRDirty* di = NULL;
13353 IRTemp res = newTemp(Ity_V128);
13354 switch (opc) {
13355 case BITS5(0,0,0,0,0): {
13356 IRExpr* vNloMasked = unop(Iop_32Uto64,
13357 unop(Iop_64to32, mkexpr(vNlo)));
13358 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13359 "arm64g_dirtyhelper_SHA1H",
13360 &arm64g_dirtyhelper_SHA1H,
13361 mkIRExprVec_3(
13362 IRExpr_VECRET(),
13363 mkU64(0), vNloMasked) );
13364 break;
13365 }
13366 case BITS5(0,0,0,0,1):
13367 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13368 "arm64g_dirtyhelper_SHA1SU1",
13369 &arm64g_dirtyhelper_SHA1SU1,
13370 mkIRExprVec_5(
13371 IRExpr_VECRET(),
13372 mkexpr(vDhi), mkexpr(vDlo),
13373 mkexpr(vNhi), mkexpr(vNlo)) );
13374 break;
13375 case BITS5(0,0,0,1,0):
13376 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13377 "arm64g_dirtyhelper_SHA256SU0",
13378 &arm64g_dirtyhelper_SHA256SU0,
13379 mkIRExprVec_5(
13380 IRExpr_VECRET(),
13381 mkexpr(vDhi), mkexpr(vDlo),
13382 mkexpr(vNhi), mkexpr(vNlo)) );
13383 break;
13384 default:
13385 vassert(0);
13386 }
13387 stmt(IRStmt_Dirty(di));
13388 putQReg128(dd, mkexpr(res));
13389 switch (opc) {
13390 case BITS5(0,0,0,0,0):
13391 DIP("%s s%u, s%u\n", inames[opc], dd, nn);
13392 break;
13393 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,0):
13394 DIP("%s v%u.4s, v%u.4s\n", inames[opc], dd, nn);
13395 break;
13396 default:
13397 vassert(0);
13398 }
13399 return True;
13400 }
13401
13402 return False;
13403 # undef INSN
13404 }
13405
13406
13407 static
dis_AdvSIMD_fp_compare(DisResult * dres,UInt insn)13408 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
13409 {
13410 /* 31 28 23 21 20 15 13 9 4
13411 000 11110 ty 1 m op 1000 n opcode2
13412 The first 3 bits are really "M 0 S", but M and S are always zero.
13413 Decode fields are: ty,op,opcode2
13414 */
13415 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13416 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13417 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
13418 return False;
13419 }
13420 UInt ty = INSN(23,22);
13421 UInt mm = INSN(20,16);
13422 UInt op = INSN(15,14);
13423 UInt nn = INSN(9,5);
13424 UInt opcode2 = INSN(4,0);
13425 vassert(ty < 4);
13426
13427 if (ty <= X01 && op == X00
13428 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
13429 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
13430 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
13431 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
13432 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
13433 /* 31 23 20 15 9 4
13434 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
13435 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
13436 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
13437 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
13438
13439 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
13440 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
13441 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
13442 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
13443
13444 FCMPE generates Invalid Operation exn if either arg is any kind
13445 of NaN. FCMP generates Invalid Operation exn if either arg is a
13446 signalling NaN. We ignore this detail here and produce the same
13447 IR for both.
13448 */
13449 Bool isD = (ty & 1) == 1;
13450 Bool isCMPE = (opcode2 & 16) == 16;
13451 Bool cmpZero = (opcode2 & 8) == 8;
13452 IRType ity = isD ? Ity_F64 : Ity_F32;
13453 Bool valid = True;
13454 if (cmpZero && mm != 0) valid = False;
13455 if (valid) {
13456 IRTemp argL = newTemp(ity);
13457 IRTemp argR = newTemp(ity);
13458 IRTemp irRes = newTemp(Ity_I32);
13459 assign(argL, getQRegLO(nn, ity));
13460 assign(argR,
13461 cmpZero
13462 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
13463 : getQRegLO(mm, ity));
13464 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
13465 mkexpr(argL), mkexpr(argR)));
13466 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13467 IRTemp nzcv_28x0 = newTemp(Ity_I64);
13468 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
13469 setFlags_COPY(nzcv_28x0);
13470 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
13471 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
13472 return True;
13473 }
13474 return False;
13475 }
13476
13477 return False;
13478 # undef INSN
13479 }
13480
13481
13482 static
dis_AdvSIMD_fp_conditional_compare(DisResult * dres,UInt insn)13483 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
13484 {
13485 /* 31 28 23 21 20 15 11 9 4 3
13486 000 11110 ty 1 m cond 01 n op nzcv
13487 The first 3 bits are really "M 0 S", but M and S are always zero.
13488 Decode fields are: ty,op
13489 */
13490 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13491 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13492 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
13493 return False;
13494 }
13495 UInt ty = INSN(23,22);
13496 UInt mm = INSN(20,16);
13497 UInt cond = INSN(15,12);
13498 UInt nn = INSN(9,5);
13499 UInt op = INSN(4,4);
13500 UInt nzcv = INSN(3,0);
13501 vassert(ty < 4 && op <= 1);
13502
13503 if (ty <= BITS2(0,1)) {
13504 /* -------- 00,0 FCCMP s_s -------- */
13505 /* -------- 00,1 FCCMPE s_s -------- */
13506 /* -------- 01,0 FCCMP d_d -------- */
13507 /* -------- 01,1 FCCMPE d_d -------- */
13508
13509 /* FCCMPE generates Invalid Operation exn if either arg is any kind
13510 of NaN. FCCMP generates Invalid Operation exn if either arg is a
13511 signalling NaN. We ignore this detail here and produce the same
13512 IR for both.
13513 */
13514 Bool isD = (ty & 1) == 1;
13515 Bool isCMPE = op == 1;
13516 IRType ity = isD ? Ity_F64 : Ity_F32;
13517 IRTemp argL = newTemp(ity);
13518 IRTemp argR = newTemp(ity);
13519 IRTemp irRes = newTemp(Ity_I32);
13520 assign(argL, getQRegLO(nn, ity));
13521 assign(argR, getQRegLO(mm, ity));
13522 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
13523 mkexpr(argL), mkexpr(argR)));
13524 IRTemp condT = newTemp(Ity_I1);
13525 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
13526 IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13527
13528 IRTemp nzcvT_28x0 = newTemp(Ity_I64);
13529 assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
13530
13531 IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
13532
13533 IRTemp nzcv_28x0 = newTemp(Ity_I64);
13534 assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
13535 mkexpr(nzcvT_28x0), nzcvF_28x0));
13536 setFlags_COPY(nzcv_28x0);
13537 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
13538 nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
13539 return True;
13540 }
13541
13542 return False;
13543 # undef INSN
13544 }
13545
13546
13547 static
dis_AdvSIMD_fp_conditional_select(DisResult * dres,UInt insn)13548 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
13549 {
13550 /* 31 23 21 20 15 11 9 5
13551 000 11110 ty 1 m cond 11 n d
13552 The first 3 bits are really "M 0 S", but M and S are always zero.
13553 Decode fields: ty
13554 */
13555 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13556 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
13557 || INSN(11,10) != BITS2(1,1)) {
13558 return False;
13559 }
13560 UInt ty = INSN(23,22);
13561 UInt mm = INSN(20,16);
13562 UInt cond = INSN(15,12);
13563 UInt nn = INSN(9,5);
13564 UInt dd = INSN(4,0);
13565 if (ty <= X01) {
13566 /* -------- 00: FCSEL s_s -------- */
13567 /* -------- 00: FCSEL d_d -------- */
13568 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
13569 IRTemp srcT = newTemp(ity);
13570 IRTemp srcF = newTemp(ity);
13571 IRTemp res = newTemp(ity);
13572 assign(srcT, getQRegLO(nn, ity));
13573 assign(srcF, getQRegLO(mm, ity));
13574 assign(res, IRExpr_ITE(
13575 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
13576 mkexpr(srcT), mkexpr(srcF)));
13577 putQReg128(dd, mkV128(0x0000));
13578 putQRegLO(dd, mkexpr(res));
13579 DIP("fcsel %s, %s, %s, %s\n",
13580 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
13581 nameCC(cond));
13582 return True;
13583 }
13584 return False;
13585 # undef INSN
13586 }
13587
13588
13589 static
dis_AdvSIMD_fp_data_proc_1_source(DisResult * dres,UInt insn)13590 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
13591 {
13592 /* 31 28 23 21 20 14 9 4
13593 000 11110 ty 1 opcode 10000 n d
13594 The first 3 bits are really "M 0 S", but M and S are always zero.
13595 Decode fields: ty,opcode
13596 */
13597 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13598 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13599 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
13600 return False;
13601 }
13602 UInt ty = INSN(23,22);
13603 UInt opcode = INSN(20,15);
13604 UInt nn = INSN(9,5);
13605 UInt dd = INSN(4,0);
13606
13607 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
13608 /* -------- 0x,000000: FMOV d_d, s_s -------- */
13609 /* -------- 0x,000001: FABS d_d, s_s -------- */
13610 /* -------- 0x,000010: FNEG d_d, s_s -------- */
13611 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
13612 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
13613 IRTemp src = newTemp(ity);
13614 IRTemp res = newTemp(ity);
13615 const HChar* nm = "??";
13616 assign(src, getQRegLO(nn, ity));
13617 switch (opcode) {
13618 case BITS6(0,0,0,0,0,0):
13619 nm = "fmov"; assign(res, mkexpr(src)); break;
13620 case BITS6(0,0,0,0,0,1):
13621 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
13622 case BITS6(0,0,0,0,1,0):
13623 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
13624 case BITS6(0,0,0,0,1,1):
13625 nm = "fsqrt";
13626 assign(res, binop(mkSQRTF(ity),
13627 mkexpr(mk_get_IR_rounding_mode()),
13628 mkexpr(src))); break;
13629 default:
13630 vassert(0);
13631 }
13632 putQReg128(dd, mkV128(0x0000));
13633 putQRegLO(dd, mkexpr(res));
13634 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13635 return True;
13636 }
13637
13638 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
13639 || opcode == BITS6(0,0,0,1,0,1)))
13640 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
13641 || opcode == BITS6(0,0,0,1,0,1)))
13642 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
13643 || opcode == BITS6(0,0,0,1,0,0)))) {
13644 /* -------- 11,000100: FCVT s_h -------- */
13645 /* -------- 11,000101: FCVT d_h -------- */
13646 /* -------- 00,000111: FCVT h_s -------- */
13647 /* -------- 00,000101: FCVT d_s -------- */
13648 /* -------- 01,000111: FCVT h_d -------- */
13649 /* -------- 01,000100: FCVT s_d -------- */
13650 /* 31 23 21 16 14 9 4
13651 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
13652 --------- 11 ----- 01 --------- FCVT Dd, Hn
13653 --------- 00 ----- 11 --------- FCVT Hd, Sn
13654 --------- 00 ----- 01 --------- FCVT Dd, Sn
13655 --------- 01 ----- 11 --------- FCVT Hd, Dn
13656 --------- 01 ----- 00 --------- FCVT Sd, Dn
13657 Rounding, when dst is smaller than src, is per the FPCR.
13658 */
13659 UInt b2322 = ty;
13660 UInt b1615 = opcode & BITS2(1,1);
13661 switch ((b2322 << 2) | b1615) {
13662 case BITS4(0,0,0,1): // S -> D
13663 case BITS4(1,1,0,1): { // H -> D
13664 Bool srcIsH = b2322 == BITS2(1,1);
13665 IRType srcTy = srcIsH ? Ity_F16 : Ity_F32;
13666 IRTemp res = newTemp(Ity_F64);
13667 assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
13668 getQRegLO(nn, srcTy)));
13669 putQReg128(dd, mkV128(0x0000));
13670 putQRegLO(dd, mkexpr(res));
13671 DIP("fcvt %s, %s\n",
13672 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
13673 return True;
13674 }
13675 case BITS4(0,1,0,0): // D -> S
13676 case BITS4(0,1,1,1): { // D -> H
13677 Bool dstIsH = b1615 == BITS2(1,1);
13678 IRType dstTy = dstIsH ? Ity_F16 : Ity_F32;
13679 IRTemp res = newTemp(dstTy);
13680 assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
13681 mkexpr(mk_get_IR_rounding_mode()),
13682 getQRegLO(nn, Ity_F64)));
13683 putQReg128(dd, mkV128(0x0000));
13684 putQRegLO(dd, mkexpr(res));
13685 DIP("fcvt %s, %s\n",
13686 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
13687 return True;
13688 }
13689 case BITS4(0,0,1,1): // S -> H
13690 case BITS4(1,1,0,0): { // H -> S
13691 Bool toH = b1615 == BITS2(1,1);
13692 IRType srcTy = toH ? Ity_F32 : Ity_F16;
13693 IRType dstTy = toH ? Ity_F16 : Ity_F32;
13694 IRTemp res = newTemp(dstTy);
13695 if (toH) {
13696 assign(res, binop(Iop_F32toF16,
13697 mkexpr(mk_get_IR_rounding_mode()),
13698 getQRegLO(nn, srcTy)));
13699
13700 } else {
13701 assign(res, unop(Iop_F16toF32,
13702 getQRegLO(nn, srcTy)));
13703 }
13704 putQReg128(dd, mkV128(0x0000));
13705 putQRegLO(dd, mkexpr(res));
13706 DIP("fcvt %s, %s\n",
13707 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
13708 return True;
13709 }
13710 default:
13711 break;
13712 }
13713 /* else unhandled */
13714 return False;
13715 }
13716
13717 if (ty <= X01
13718 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
13719 && opcode != BITS6(0,0,1,1,0,1)) {
13720 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
13721 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
13722 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
13723 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
13724 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
13725 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
13726 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
13727 /* 31 23 21 17 14 9 4
13728 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
13729 rm
13730 x==0 => S-registers, x==1 => D-registers
13731 rm (17:15) encodings:
13732 111 per FPCR (FRINTI)
13733 001 +inf (FRINTP)
13734 010 -inf (FRINTM)
13735 011 zero (FRINTZ)
13736 000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
13737 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
13738 110 per FPCR + "exact = TRUE" (FRINTX)
13739 101 unallocated
13740 */
13741 Bool isD = (ty & 1) == 1;
13742 UInt rm = opcode & BITS6(0,0,0,1,1,1);
13743 IRType ity = isD ? Ity_F64 : Ity_F32;
13744 IRExpr* irrmE = NULL;
13745 UChar ch = '?';
13746 switch (rm) {
13747 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
13748 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
13749 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
13750 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
13751 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
13752 // I am unsure about the following, due to the "integral exact"
13753 // description in the manual. What does it mean? (frintx, that is)
13754 case BITS3(1,1,0):
13755 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13756 case BITS3(1,1,1):
13757 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13758 // The following is a kludge. There's no Irrm_ value to represent
13759 // this ("to nearest, with ties to even")
13760 case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
13761 default: break;
13762 }
13763 if (irrmE) {
13764 IRTemp src = newTemp(ity);
13765 IRTemp dst = newTemp(ity);
13766 assign(src, getQRegLO(nn, ity));
13767 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13768 irrmE, mkexpr(src)));
13769 putQReg128(dd, mkV128(0x0000));
13770 putQRegLO(dd, mkexpr(dst));
13771 DIP("frint%c %s, %s\n",
13772 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13773 return True;
13774 }
13775 return False;
13776 }
13777
13778 return False;
13779 # undef INSN
13780 }
13781
13782
13783 static
dis_AdvSIMD_fp_data_proc_2_source(DisResult * dres,UInt insn)13784 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
13785 {
13786 /* 31 28 23 21 20 15 11 9 4
13787 000 11110 ty 1 m opcode 10 n d
13788 The first 3 bits are really "M 0 S", but M and S are always zero.
13789 Decode fields: ty, opcode
13790 */
13791 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13792 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13793 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
13794 return False;
13795 }
13796 UInt ty = INSN(23,22);
13797 UInt mm = INSN(20,16);
13798 UInt opcode = INSN(15,12);
13799 UInt nn = INSN(9,5);
13800 UInt dd = INSN(4,0);
13801
13802 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
13803 /* ------- 0x,0000: FMUL d_d, s_s ------- */
13804 /* ------- 0x,0001: FDIV d_d, s_s ------- */
13805 /* ------- 0x,0010: FADD d_d, s_s ------- */
13806 /* ------- 0x,0011: FSUB d_d, s_s ------- */
13807 /* ------- 0x,0100: FMAX d_d, s_s ------- */
13808 /* ------- 0x,0101: FMIN d_d, s_s ------- */
13809 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
13810 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
13811 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13812 IROp iop = Iop_INVALID;
13813 const HChar* nm = "???";
13814 switch (opcode) {
13815 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
13816 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
13817 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
13818 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
13819 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
13820 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
13821 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
13822 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
13823 default: vassert(0);
13824 }
13825 if (opcode <= BITS4(0,0,1,1)) {
13826 // This is really not good code. TODO: avoid width-changing
13827 IRTemp res = newTemp(ity);
13828 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13829 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13830 putQReg128(dd, mkV128(0));
13831 putQRegLO(dd, mkexpr(res));
13832 } else {
13833 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
13834 binop(iop, getQReg128(nn), getQReg128(mm))));
13835 }
13836 DIP("%s %s, %s, %s\n",
13837 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13838 return True;
13839 }
13840
13841 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
13842 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
13843 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13844 IROp iop = mkMULF(ity);
13845 IROp iopn = mkNEGF(ity);
13846 const HChar* nm = "fnmul";
13847 IRExpr* resE = unop(iopn,
13848 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13849 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13850 IRTemp res = newTemp(ity);
13851 assign(res, resE);
13852 putQReg128(dd, mkV128(0));
13853 putQRegLO(dd, mkexpr(res));
13854 DIP("%s %s, %s, %s\n",
13855 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13856 return True;
13857 }
13858
13859 return False;
13860 # undef INSN
13861 }
13862
13863
13864 static
dis_AdvSIMD_fp_data_proc_3_source(DisResult * dres,UInt insn)13865 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
13866 {
13867 /* 31 28 23 21 20 15 14 9 4
13868 000 11111 ty o1 m o0 a n d
13869 The first 3 bits are really "M 0 S", but M and S are always zero.
13870 Decode fields: ty,o1,o0
13871 */
13872 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13873 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
13874 return False;
13875 }
13876 UInt ty = INSN(23,22);
13877 UInt bitO1 = INSN(21,21);
13878 UInt mm = INSN(20,16);
13879 UInt bitO0 = INSN(15,15);
13880 UInt aa = INSN(14,10);
13881 UInt nn = INSN(9,5);
13882 UInt dd = INSN(4,0);
13883 vassert(ty < 4);
13884
13885 if (ty <= X01) {
13886 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
13887 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
13888 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
13889 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
13890 /* -------------------- F{N}M{ADD,SUB} -------------------- */
13891 /* 31 22 20 15 14 9 4 ix
13892 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
13893 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
13894 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
13895 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
13896 where Fx=Dx when sz=1, Fx=Sx when sz=0
13897
13898 -----SPEC------ ----IMPL----
13899 fmadd a + n * m a + n * m
13900 fmsub a + (-n) * m a - n * m
13901 fnmadd (-a) + (-n) * m -(a + n * m)
13902 fnmsub (-a) + n * m -(a - n * m)
13903 */
13904 Bool isD = (ty & 1) == 1;
13905 UInt ix = (bitO1 << 1) | bitO0;
13906 IRType ity = isD ? Ity_F64 : Ity_F32;
13907 IROp opADD = mkADDF(ity);
13908 IROp opSUB = mkSUBF(ity);
13909 IROp opMUL = mkMULF(ity);
13910 IROp opNEG = mkNEGF(ity);
13911 IRTemp res = newTemp(ity);
13912 IRExpr* eA = getQRegLO(aa, ity);
13913 IRExpr* eN = getQRegLO(nn, ity);
13914 IRExpr* eM = getQRegLO(mm, ity);
13915 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
13916 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
13917 switch (ix) {
13918 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
13919 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
13920 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
13921 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
13922 default: vassert(0);
13923 }
13924 putQReg128(dd, mkV128(0x0000));
13925 putQRegLO(dd, mkexpr(res));
13926 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
13927 DIP("%s %s, %s, %s, %s\n",
13928 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
13929 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
13930 return True;
13931 }
13932
13933 return False;
13934 # undef INSN
13935 }
13936
13937
13938 static
dis_AdvSIMD_fp_immediate(DisResult * dres,UInt insn)13939 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
13940 {
13941 /* 31 28 23 21 20 12 9 4
13942 000 11110 ty 1 imm8 100 imm5 d
13943 The first 3 bits are really "M 0 S", but M and S are always zero.
13944 */
13945 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13946 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13947 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
13948 return False;
13949 }
13950 UInt ty = INSN(23,22);
13951 UInt imm8 = INSN(20,13);
13952 UInt imm5 = INSN(9,5);
13953 UInt dd = INSN(4,0);
13954
13955 /* ------- 00,00000: FMOV s_imm ------- */
13956 /* ------- 01,00000: FMOV d_imm ------- */
13957 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
13958 Bool isD = (ty & 1) == 1;
13959 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
13960 if (!isD) {
13961 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
13962 }
13963 putQReg128(dd, mkV128(0));
13964 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
13965 DIP("fmov %s, #0x%llx\n",
13966 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
13967 return True;
13968 }
13969
13970 return False;
13971 # undef INSN
13972 }
13973
13974
13975 static
dis_AdvSIMD_fp_to_from_fixedp_conv(DisResult * dres,UInt insn)13976 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
13977 {
13978 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13979 /* 31 30 29 28 23 21 20 18 15 9 4
13980 sf 0 0 11110 type 0 rmode opcode scale n d
13981 The first 3 bits are really "sf 0 S", but S is always zero.
13982 Decode fields: sf,type,rmode,opcode
13983 */
13984 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13985 if (INSN(30,29) != BITS2(0,0)
13986 || INSN(28,24) != BITS5(1,1,1,1,0)
13987 || INSN(21,21) != 0) {
13988 return False;
13989 }
13990 UInt bitSF = INSN(31,31);
13991 UInt ty = INSN(23,22); // type
13992 UInt rm = INSN(20,19); // rmode
13993 UInt op = INSN(18,16); // opcode
13994 UInt sc = INSN(15,10); // scale
13995 UInt nn = INSN(9,5);
13996 UInt dd = INSN(4,0);
13997
13998 if (ty <= X01 && rm == X11
13999 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
14000 /* -------- (ix) sf ty rm opc -------- */
14001 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
14002 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
14003 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
14004 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
14005
14006 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
14007 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
14008 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
14009 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
14010 Bool isI64 = bitSF == 1;
14011 Bool isF64 = (ty & 1) == 1;
14012 Bool isU = (op & 1) == 1;
14013 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14014
14015 Int fbits = 64 - sc;
14016 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
14017
14018 Double scale = two_to_the_plus(fbits);
14019 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
14020 : IRExpr_Const(IRConst_F32( (Float)scale ));
14021 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
14022
14023 const IROp ops[8]
14024 = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
14025 Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
14026 IRTemp irrm = newTemp(Ity_I32);
14027 assign(irrm, mkU32(Irrm_ZERO));
14028
14029 IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
14030 IRExpr* res = binop(ops[ix], mkexpr(irrm),
14031 triop(opMUL, mkexpr(irrm), src, scaleE));
14032 putIRegOrZR(isI64, dd, res);
14033
14034 DIP("fcvtz%c %s, %s, #%d\n",
14035 isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
14036 nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
14037 return True;
14038 }
14039
14040 /* ------ sf,ty,rm,opc ------ */
14041 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
14042 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
14043 /* (ix) sf S 28 ty rm opc 15 9 4
14044 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
14045 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
14046 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
14047 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
14048
14049 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
14050 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
14051 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
14052 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
14053
14054 These are signed/unsigned conversion from integer registers to
14055 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
14056 scaled per |scale|.
14057 */
14058 if (ty <= X01 && rm == X00
14059 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
14060 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
14061 Bool isI64 = bitSF == 1;
14062 Bool isF64 = (ty & 1) == 1;
14063 Bool isU = (op & 1) == 1;
14064 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14065
14066 Int fbits = 64 - sc;
14067 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
14068
14069 Double scale = two_to_the_minus(fbits);
14070 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
14071 : IRExpr_Const(IRConst_F32( (Float)scale ));
14072 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
14073
14074 const IROp ops[8]
14075 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14076 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14077 IRExpr* src = getIRegOrZR(isI64, nn);
14078 IRExpr* res = (isF64 && !isI64)
14079 ? unop(ops[ix], src)
14080 : binop(ops[ix],
14081 mkexpr(mk_get_IR_rounding_mode()), src);
14082 putQReg128(dd, mkV128(0));
14083 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
14084
14085 DIP("%ccvtf %s, %s, #%d\n",
14086 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14087 nameIRegOrZR(isI64, nn), fbits);
14088 return True;
14089 }
14090
14091 return False;
14092 # undef INSN
14093 }
14094
14095
14096 static
dis_AdvSIMD_fp_to_from_int_conv(DisResult * dres,UInt insn)14097 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
14098 {
14099 /* 31 30 29 28 23 21 20 18 15 9 4
14100 sf 0 0 11110 type 1 rmode opcode 000000 n d
14101 The first 3 bits are really "sf 0 S", but S is always zero.
14102 Decode fields: sf,type,rmode,opcode
14103 */
14104 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14105 if (INSN(30,29) != BITS2(0,0)
14106 || INSN(28,24) != BITS5(1,1,1,1,0)
14107 || INSN(21,21) != 1
14108 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
14109 return False;
14110 }
14111 UInt bitSF = INSN(31,31);
14112 UInt ty = INSN(23,22); // type
14113 UInt rm = INSN(20,19); // rmode
14114 UInt op = INSN(18,16); // opcode
14115 UInt nn = INSN(9,5);
14116 UInt dd = INSN(4,0);
14117
14118 // op = 000, 001
14119 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
14120 /* 30 23 20 18 15 9 4
14121 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
14122 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
14123 ---------------- 01 -------------- FCVTP-------- (round to +inf)
14124 ---------------- 10 -------------- FCVTM-------- (round to -inf)
14125 ---------------- 11 -------------- FCVTZ-------- (round to zero)
14126 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
14127 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
14128
14129 Rd is Xd when sf==1, Wd when sf==0
14130 Fn is Dn when x==1, Sn when x==0
14131 20:19 carry the rounding mode, using the same encoding as FPCR
14132 */
14133 if (ty <= X01
14134 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
14135 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
14136 )
14137 ) {
14138 Bool isI64 = bitSF == 1;
14139 Bool isF64 = (ty & 1) == 1;
14140 Bool isU = (op & 1) == 1;
14141 /* Decide on the IR rounding mode to use. */
14142 IRRoundingMode irrm = 8; /*impossible*/
14143 HChar ch = '?';
14144 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
14145 switch (rm) {
14146 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
14147 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
14148 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
14149 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
14150 default: vassert(0);
14151 }
14152 } else {
14153 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
14154 switch (rm) {
14155 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
14156 default: vassert(0);
14157 }
14158 }
14159 vassert(irrm != 8);
14160 /* Decide on the conversion primop, based on the source size,
14161 dest size and signedness (8 possibilities). Case coding:
14162 F32 ->s I32 0
14163 F32 ->u I32 1
14164 F32 ->s I64 2
14165 F32 ->u I64 3
14166 F64 ->s I32 4
14167 F64 ->u I32 5
14168 F64 ->s I64 6
14169 F64 ->u I64 7
14170 */
14171 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
14172 vassert(ix < 8);
14173 const IROp iops[8]
14174 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
14175 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
14176 IROp iop = iops[ix];
14177 // A bit of ATCery: bounce all cases we haven't seen an example of.
14178 if (/* F32toI32S */
14179 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
14180 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
14181 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
14182 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
14183 /* F32toI32U */
14184 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
14185 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
14186 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
14187 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
14188 /* F32toI64S */
14189 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
14190 || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
14191 || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
14192 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
14193 /* F32toI64U */
14194 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
14195 || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
14196 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
14197 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
14198 /* F64toI32S */
14199 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
14200 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
14201 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
14202 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
14203 /* F64toI32U */
14204 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
14205 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
14206 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
14207 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
14208 /* F64toI64S */
14209 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
14210 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
14211 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
14212 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
14213 /* F64toI64U */
14214 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
14215 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
14216 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
14217 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
14218 ) {
14219 /* validated */
14220 } else {
14221 return False;
14222 }
14223 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
14224 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
14225 IRTemp src = newTemp(srcTy);
14226 IRTemp dst = newTemp(dstTy);
14227 assign(src, getQRegLO(nn, srcTy));
14228 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
14229 putIRegOrZR(isI64, dd, mkexpr(dst));
14230 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
14231 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
14232 return True;
14233 }
14234
14235 // op = 010, 011
14236 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
14237 /* (ix) sf S 28 ty rm op 15 9 4
14238 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
14239 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
14240 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
14241 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
14242
14243 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
14244 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
14245 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
14246 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
14247
14248 These are signed/unsigned conversion from integer registers to
14249 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
14250 */
14251 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
14252 Bool isI64 = bitSF == 1;
14253 Bool isF64 = (ty & 1) == 1;
14254 Bool isU = (op & 1) == 1;
14255 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14256 const IROp ops[8]
14257 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14258 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14259 IRExpr* src = getIRegOrZR(isI64, nn);
14260 IRExpr* res = (isF64 && !isI64)
14261 ? unop(ops[ix], src)
14262 : binop(ops[ix],
14263 mkexpr(mk_get_IR_rounding_mode()), src);
14264 putQReg128(dd, mkV128(0));
14265 putQRegLO(dd, res);
14266 DIP("%ccvtf %s, %s\n",
14267 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14268 nameIRegOrZR(isI64, nn));
14269 return True;
14270 }
14271
14272 // op = 110, 111
14273 /* -------- FMOV (general) -------- */
14274 /* case sf S ty rm op 15 9 4
14275 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
14276 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
14277 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
14278
14279 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
14280 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
14281 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
14282 */
14283 if (1) {
14284 UInt ix = 0; // case
14285 if (bitSF == 0) {
14286 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14287 ix = 1;
14288 else
14289 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14290 ix = 4;
14291 } else {
14292 vassert(bitSF == 1);
14293 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14294 ix = 2;
14295 else
14296 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14297 ix = 5;
14298 else
14299 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
14300 ix = 3;
14301 else
14302 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
14303 ix = 6;
14304 }
14305 if (ix > 0) {
14306 switch (ix) {
14307 case 1:
14308 putQReg128(dd, mkV128(0));
14309 putQRegLO(dd, getIReg32orZR(nn));
14310 DIP("fmov s%u, w%u\n", dd, nn);
14311 break;
14312 case 2:
14313 putQReg128(dd, mkV128(0));
14314 putQRegLO(dd, getIReg64orZR(nn));
14315 DIP("fmov d%u, x%u\n", dd, nn);
14316 break;
14317 case 3:
14318 putQRegHI64(dd, getIReg64orZR(nn));
14319 DIP("fmov v%u.d[1], x%u\n", dd, nn);
14320 break;
14321 case 4:
14322 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
14323 DIP("fmov w%u, s%u\n", dd, nn);
14324 break;
14325 case 5:
14326 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
14327 DIP("fmov x%u, d%u\n", dd, nn);
14328 break;
14329 case 6:
14330 putIReg64orZR(dd, getQRegHI64(nn));
14331 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
14332 break;
14333 default:
14334 vassert(0);
14335 }
14336 return True;
14337 }
14338 /* undecodable; fall through */
14339 }
14340
14341 return False;
14342 # undef INSN
14343 }
14344
14345
14346 static
dis_ARM64_simd_and_fp(DisResult * dres,UInt insn)14347 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
14348 {
14349 Bool ok;
14350 ok = dis_AdvSIMD_EXT(dres, insn);
14351 if (UNLIKELY(ok)) return True;
14352 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
14353 if (UNLIKELY(ok)) return True;
14354 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
14355 if (UNLIKELY(ok)) return True;
14356 ok = dis_AdvSIMD_across_lanes(dres, insn);
14357 if (UNLIKELY(ok)) return True;
14358 ok = dis_AdvSIMD_copy(dres, insn);
14359 if (UNLIKELY(ok)) return True;
14360 ok = dis_AdvSIMD_modified_immediate(dres, insn);
14361 if (UNLIKELY(ok)) return True;
14362 ok = dis_AdvSIMD_scalar_copy(dres, insn);
14363 if (UNLIKELY(ok)) return True;
14364 ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
14365 if (UNLIKELY(ok)) return True;
14366 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
14367 if (UNLIKELY(ok)) return True;
14368 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
14369 if (UNLIKELY(ok)) return True;
14370 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
14371 if (UNLIKELY(ok)) return True;
14372 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
14373 if (UNLIKELY(ok)) return True;
14374 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
14375 if (UNLIKELY(ok)) return True;
14376 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
14377 if (UNLIKELY(ok)) return True;
14378 ok = dis_AdvSIMD_three_different(dres, insn);
14379 if (UNLIKELY(ok)) return True;
14380 ok = dis_AdvSIMD_three_same(dres, insn);
14381 if (UNLIKELY(ok)) return True;
14382 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
14383 if (UNLIKELY(ok)) return True;
14384 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
14385 if (UNLIKELY(ok)) return True;
14386 ok = dis_AdvSIMD_crypto_aes(dres, insn);
14387 if (UNLIKELY(ok)) return True;
14388 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
14389 if (UNLIKELY(ok)) return True;
14390 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
14391 if (UNLIKELY(ok)) return True;
14392 ok = dis_AdvSIMD_fp_compare(dres, insn);
14393 if (UNLIKELY(ok)) return True;
14394 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
14395 if (UNLIKELY(ok)) return True;
14396 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
14397 if (UNLIKELY(ok)) return True;
14398 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
14399 if (UNLIKELY(ok)) return True;
14400 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
14401 if (UNLIKELY(ok)) return True;
14402 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
14403 if (UNLIKELY(ok)) return True;
14404 ok = dis_AdvSIMD_fp_immediate(dres, insn);
14405 if (UNLIKELY(ok)) return True;
14406 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
14407 if (UNLIKELY(ok)) return True;
14408 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
14409 if (UNLIKELY(ok)) return True;
14410 return False;
14411 }
14412
14413
14414 /*------------------------------------------------------------*/
14415 /*--- Disassemble a single ARM64 instruction ---*/
14416 /*------------------------------------------------------------*/
14417
14418 /* Disassemble a single ARM64 instruction into IR. The instruction
14419 has is located at |guest_instr| and has guest IP of
14420 |guest_PC_curr_instr|, which will have been set before the call
14421 here. Returns True iff the instruction was decoded, in which case
14422 *dres will be set accordingly, or False, in which case *dres should
14423 be ignored by the caller. */
14424
14425 static
disInstr_ARM64_WRK(DisResult * dres,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,const UChar * guest_instr,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo)14426 Bool disInstr_ARM64_WRK (
14427 /*MB_OUT*/DisResult* dres,
14428 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
14429 Bool resteerCisOk,
14430 void* callback_opaque,
14431 const UChar* guest_instr,
14432 const VexArchInfo* archinfo,
14433 const VexAbiInfo* abiinfo
14434 )
14435 {
14436 // A macro to fish bits out of 'insn'.
14437 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14438
14439 //ZZ DisResult dres;
14440 //ZZ UInt insn;
14441 //ZZ //Bool allow_VFP = False;
14442 //ZZ //UInt hwcaps = archinfo->hwcaps;
14443 //ZZ IRTemp condT; /* :: Ity_I32 */
14444 //ZZ UInt summary;
14445 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
14446 //ZZ
14447 //ZZ /* What insn variants are we supporting today? */
14448 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
14449 //ZZ // etc etc
14450
14451 /* Set result defaults. */
14452 dres->whatNext = Dis_Continue;
14453 dres->len = 4;
14454 dres->continueAt = 0;
14455 dres->jk_StopHere = Ijk_INVALID;
14456 dres->hint = Dis_HintNone;
14457
14458 /* At least this is simple on ARM64: insns are all 4 bytes long, and
14459 4-aligned. So just fish the whole thing out of memory right now
14460 and have done. */
14461 UInt insn = getUIntLittleEndianly( guest_instr );
14462
14463 if (0) vex_printf("insn: 0x%x\n", insn);
14464
14465 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
14466
14467 vassert(0 == (guest_PC_curr_instr & 3ULL));
14468
14469 /* ----------------------------------------------------------- */
14470
14471 /* Spot "Special" instructions (see comment at top of file). */
14472 {
14473 const UChar* code = guest_instr;
14474 /* Spot the 16-byte preamble:
14475 93CC0D8C ror x12, x12, #3
14476 93CC358C ror x12, x12, #13
14477 93CCCD8C ror x12, x12, #51
14478 93CCF58C ror x12, x12, #61
14479 */
14480 UInt word1 = 0x93CC0D8C;
14481 UInt word2 = 0x93CC358C;
14482 UInt word3 = 0x93CCCD8C;
14483 UInt word4 = 0x93CCF58C;
14484 if (getUIntLittleEndianly(code+ 0) == word1 &&
14485 getUIntLittleEndianly(code+ 4) == word2 &&
14486 getUIntLittleEndianly(code+ 8) == word3 &&
14487 getUIntLittleEndianly(code+12) == word4) {
14488 /* Got a "Special" instruction preamble. Which one is it? */
14489 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
14490 /* orr x10,x10,x10 */) {
14491 /* X3 = client_request ( X4 ) */
14492 DIP("x3 = client_request ( x4 )\n");
14493 putPC(mkU64( guest_PC_curr_instr + 20 ));
14494 dres->jk_StopHere = Ijk_ClientReq;
14495 dres->whatNext = Dis_StopHere;
14496 return True;
14497 }
14498 else
14499 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
14500 /* orr x11,x11,x11 */) {
14501 /* X3 = guest_NRADDR */
14502 DIP("x3 = guest_NRADDR\n");
14503 dres->len = 20;
14504 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
14505 return True;
14506 }
14507 else
14508 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
14509 /* orr x12,x12,x12 */) {
14510 /* branch-and-link-to-noredir X8 */
14511 DIP("branch-and-link-to-noredir x8\n");
14512 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
14513 putPC(getIReg64orZR(8));
14514 dres->jk_StopHere = Ijk_NoRedir;
14515 dres->whatNext = Dis_StopHere;
14516 return True;
14517 }
14518 else
14519 if (getUIntLittleEndianly(code+16) == 0xAA090129
14520 /* orr x9,x9,x9 */) {
14521 /* IR injection */
14522 DIP("IR injection\n");
14523 vex_inject_ir(irsb, Iend_LE);
14524 // Invalidate the current insn. The reason is that the IRop we're
14525 // injecting here can change. In which case the translation has to
14526 // be redone. For ease of handling, we simply invalidate all the
14527 // time.
14528 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
14529 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
14530 putPC(mkU64( guest_PC_curr_instr + 20 ));
14531 dres->whatNext = Dis_StopHere;
14532 dres->jk_StopHere = Ijk_InvalICache;
14533 return True;
14534 }
14535 /* We don't know what it is. */
14536 return False;
14537 /*NOTREACHED*/
14538 }
14539 }
14540
14541 /* ----------------------------------------------------------- */
14542
14543 /* Main ARM64 instruction decoder starts here. */
14544
14545 Bool ok = False;
14546
14547 /* insn[28:25] determines the top-level grouping, so let's start
14548 off with that.
14549
14550 For all of these dis_ARM64_ functions, we pass *dres with the
14551 normal default results "insn OK, 4 bytes long, keep decoding" so
14552 they don't need to change it. However, decodes of control-flow
14553 insns may cause *dres to change.
14554 */
14555 switch (INSN(28,25)) {
14556 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
14557 // Data processing - immediate
14558 ok = dis_ARM64_data_processing_immediate(dres, insn);
14559 break;
14560 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
14561 // Branch, exception generation and system instructions
14562 ok = dis_ARM64_branch_etc(dres, insn, archinfo, abiinfo);
14563 break;
14564 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
14565 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
14566 // Loads and stores
14567 ok = dis_ARM64_load_store(dres, insn, abiinfo);
14568 break;
14569 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
14570 // Data processing - register
14571 ok = dis_ARM64_data_processing_register(dres, insn);
14572 break;
14573 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
14574 // Data processing - SIMD and floating point
14575 ok = dis_ARM64_simd_and_fp(dres, insn);
14576 break;
14577 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
14578 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
14579 // UNALLOCATED
14580 break;
14581 default:
14582 vassert(0); /* Can't happen */
14583 }
14584
14585 /* If the next-level down decoders failed, make sure |dres| didn't
14586 get changed. */
14587 if (!ok) {
14588 vassert(dres->whatNext == Dis_Continue);
14589 vassert(dres->len == 4);
14590 vassert(dres->continueAt == 0);
14591 vassert(dres->jk_StopHere == Ijk_INVALID);
14592 }
14593
14594 return ok;
14595
14596 # undef INSN
14597 }
14598
14599
14600 /*------------------------------------------------------------*/
14601 /*--- Top-level fn ---*/
14602 /*------------------------------------------------------------*/
14603
14604 /* Disassemble a single instruction into IR. The instruction
14605 is located in host memory at &guest_code[delta]. */
14606
disInstr_ARM64(IRSB * irsb_IN,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,const UChar * guest_code_IN,Long delta_IN,Addr guest_IP,VexArch guest_arch,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo,VexEndness host_endness_IN,Bool sigill_diag_IN)14607 DisResult disInstr_ARM64 ( IRSB* irsb_IN,
14608 Bool (*resteerOkFn) ( void*, Addr ),
14609 Bool resteerCisOk,
14610 void* callback_opaque,
14611 const UChar* guest_code_IN,
14612 Long delta_IN,
14613 Addr guest_IP,
14614 VexArch guest_arch,
14615 const VexArchInfo* archinfo,
14616 const VexAbiInfo* abiinfo,
14617 VexEndness host_endness_IN,
14618 Bool sigill_diag_IN )
14619 {
14620 DisResult dres;
14621 vex_bzero(&dres, sizeof(dres));
14622
14623 /* Set globals (see top of this file) */
14624 vassert(guest_arch == VexArchARM64);
14625
14626 irsb = irsb_IN;
14627 host_endness = host_endness_IN;
14628 guest_PC_curr_instr = (Addr64)guest_IP;
14629
14630 /* Sanity checks */
14631 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
14632 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
14633 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
14634
14635 /* Try to decode */
14636 Bool ok = disInstr_ARM64_WRK( &dres,
14637 resteerOkFn, resteerCisOk, callback_opaque,
14638 &guest_code_IN[delta_IN],
14639 archinfo, abiinfo );
14640 if (ok) {
14641 /* All decode successes end up here. */
14642 vassert(dres.len == 4 || dres.len == 20);
14643 switch (dres.whatNext) {
14644 case Dis_Continue:
14645 putPC( mkU64(dres.len + guest_PC_curr_instr) );
14646 break;
14647 case Dis_ResteerU:
14648 case Dis_ResteerC:
14649 putPC(mkU64(dres.continueAt));
14650 break;
14651 case Dis_StopHere:
14652 break;
14653 default:
14654 vassert(0);
14655 }
14656 DIP("\n");
14657 } else {
14658 /* All decode failures end up here. */
14659 if (sigill_diag_IN) {
14660 Int i, j;
14661 UChar buf[64];
14662 UInt insn
14663 = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
14664 vex_bzero(buf, sizeof(buf));
14665 for (i = j = 0; i < 32; i++) {
14666 if (i > 0) {
14667 if ((i & 7) == 0) buf[j++] = ' ';
14668 else if ((i & 3) == 0) buf[j++] = '\'';
14669 }
14670 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
14671 }
14672 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
14673 vex_printf("disInstr(arm64): %s\n", buf);
14674 }
14675
14676 /* Tell the dispatcher that this insn cannot be decoded, and so
14677 has not been executed, and (is currently) the next to be
14678 executed. PC should be up-to-date since it is made so at the
14679 start of each insn, but nevertheless be paranoid and update
14680 it again right now. */
14681 putPC( mkU64(guest_PC_curr_instr) );
14682 dres.len = 0;
14683 dres.whatNext = Dis_StopHere;
14684 dres.jk_StopHere = Ijk_NoDecode;
14685 dres.continueAt = 0;
14686 }
14687 return dres;
14688 }
14689
14690
14691 /*--------------------------------------------------------------------*/
14692 /*--- end guest_arm64_toIR.c ---*/
14693 /*--------------------------------------------------------------------*/
14694