1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_defs.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2013-2017 OpenWorks
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #include "libvex_basictypes.h"
32 #include "libvex.h"
33 #include "libvex_trc_values.h"
34
35 #include "main_util.h"
36 #include "host_generic_regs.h"
37 #include "host_arm64_defs.h"
38
39
40 /* --------- Registers. --------- */
41
42 /* The usual HReg abstraction. We use the following classes only:
43 X regs (64 bit int)
44 D regs (64 bit float, also used for 32 bit float)
45 Q regs (128 bit vector)
46 */
47
getRRegUniverse_ARM64(void)48 const RRegUniverse* getRRegUniverse_ARM64 ( void )
49 {
50 /* The real-register universe is a big constant, so we just want to
51 initialise it once. */
52 static RRegUniverse rRegUniverse_ARM64;
53 static Bool rRegUniverse_ARM64_initted = False;
54
55 /* Handy shorthand, nothing more */
56 RRegUniverse* ru = &rRegUniverse_ARM64;
57
58 /* This isn't thread-safe. Sigh. */
59 if (LIKELY(rRegUniverse_ARM64_initted))
60 return ru;
61
62 RRegUniverse__init(ru);
63
64 /* Add the registers. The initial segment of this array must be
65 those available for allocation by reg-alloc, and those that
66 follow are not available for allocation. */
67
68 ru->regs[ru->size++] = hregARM64_X22();
69 ru->regs[ru->size++] = hregARM64_X23();
70 ru->regs[ru->size++] = hregARM64_X24();
71 ru->regs[ru->size++] = hregARM64_X25();
72 ru->regs[ru->size++] = hregARM64_X26();
73 ru->regs[ru->size++] = hregARM64_X27();
74 ru->regs[ru->size++] = hregARM64_X28();
75
76 ru->regs[ru->size++] = hregARM64_X0();
77 ru->regs[ru->size++] = hregARM64_X1();
78 ru->regs[ru->size++] = hregARM64_X2();
79 ru->regs[ru->size++] = hregARM64_X3();
80 ru->regs[ru->size++] = hregARM64_X4();
81 ru->regs[ru->size++] = hregARM64_X5();
82 ru->regs[ru->size++] = hregARM64_X6();
83 ru->regs[ru->size++] = hregARM64_X7();
84 // X8 is used as a ProfInc temporary, not available to regalloc.
85 // X9 is a chaining/spill temporary, not available to regalloc.
86
87 // Do we really need all these?
88 //ru->regs[ru->size++] = hregARM64_X10();
89 //ru->regs[ru->size++] = hregARM64_X11();
90 //ru->regs[ru->size++] = hregARM64_X12();
91 //ru->regs[ru->size++] = hregARM64_X13();
92 //ru->regs[ru->size++] = hregARM64_X14();
93 //ru->regs[ru->size++] = hregARM64_X15();
94 // X21 is the guest state pointer, not available to regalloc.
95
96 // vector regs. Unfortunately not callee-saved.
97 ru->regs[ru->size++] = hregARM64_Q16();
98 ru->regs[ru->size++] = hregARM64_Q17();
99 ru->regs[ru->size++] = hregARM64_Q18();
100 ru->regs[ru->size++] = hregARM64_Q19();
101 ru->regs[ru->size++] = hregARM64_Q20();
102
103 // F64 regs, all of which are callee-saved
104 ru->regs[ru->size++] = hregARM64_D8();
105 ru->regs[ru->size++] = hregARM64_D9();
106 ru->regs[ru->size++] = hregARM64_D10();
107 ru->regs[ru->size++] = hregARM64_D11();
108 ru->regs[ru->size++] = hregARM64_D12();
109 ru->regs[ru->size++] = hregARM64_D13();
110
111 ru->allocable = ru->size;
112 /* And other regs, not available to the allocator. */
113
114 // unavail: x21 as GSP
115 // x8 is used as a ProfInc temporary
116 // x9 is used as a spill/reload/chaining/call temporary
117 // x30 as LR
118 // x31 because dealing with the SP-vs-ZR overloading is too
119 // confusing, and we don't need to do so, so let's just avoid
120 // the problem
121 //
122 // Currently, we have 15 allocatable integer registers:
123 // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
124 //
125 // Hence for the allocatable integer registers we have:
126 //
127 // callee-saved: 22 23 24 25 26 27 28
128 // caller-saved: 0 1 2 3 4 5 6 7
129 //
130 // If the set of available registers changes or if the e/r status
131 // changes, be sure to re-check/sync the definition of
132 // getRegUsage for ARM64Instr_Call too.
133
134 ru->regs[ru->size++] = hregARM64_X8();
135 ru->regs[ru->size++] = hregARM64_X9();
136 ru->regs[ru->size++] = hregARM64_X21();
137
138 rRegUniverse_ARM64_initted = True;
139
140 RRegUniverse__check_is_sane(ru);
141 return ru;
142 }
143
144
ppHRegARM64(HReg reg)145 void ppHRegARM64 ( HReg reg ) {
146 Int r;
147 /* Be generic for all virtual regs. */
148 if (hregIsVirtual(reg)) {
149 ppHReg(reg);
150 return;
151 }
152 /* But specific for real regs. */
153 switch (hregClass(reg)) {
154 case HRcInt64:
155 r = hregEncoding(reg);
156 vassert(r >= 0 && r < 31);
157 vex_printf("x%d", r);
158 return;
159 case HRcFlt64:
160 r = hregEncoding(reg);
161 vassert(r >= 0 && r < 32);
162 vex_printf("d%d", r);
163 return;
164 case HRcVec128:
165 r = hregEncoding(reg);
166 vassert(r >= 0 && r < 32);
167 vex_printf("q%d", r);
168 return;
169 default:
170 vpanic("ppHRegARM64");
171 }
172 }
173
ppHRegARM64asSreg(HReg reg)174 static void ppHRegARM64asSreg ( HReg reg ) {
175 ppHRegARM64(reg);
176 vex_printf("(S-reg)");
177 }
178
ppHRegARM64asHreg(HReg reg)179 static void ppHRegARM64asHreg ( HReg reg ) {
180 ppHRegARM64(reg);
181 vex_printf("(H-reg)");
182 }
183
184
185 /* --------- Condition codes, ARM64 encoding. --------- */
186
showARM64CondCode(ARM64CondCode cond)187 static const HChar* showARM64CondCode ( ARM64CondCode cond ) {
188 switch (cond) {
189 case ARM64cc_EQ: return "eq";
190 case ARM64cc_NE: return "ne";
191 case ARM64cc_CS: return "cs";
192 case ARM64cc_CC: return "cc";
193 case ARM64cc_MI: return "mi";
194 case ARM64cc_PL: return "pl";
195 case ARM64cc_VS: return "vs";
196 case ARM64cc_VC: return "vc";
197 case ARM64cc_HI: return "hi";
198 case ARM64cc_LS: return "ls";
199 case ARM64cc_GE: return "ge";
200 case ARM64cc_LT: return "lt";
201 case ARM64cc_GT: return "gt";
202 case ARM64cc_LE: return "le";
203 case ARM64cc_AL: return "al"; // default
204 case ARM64cc_NV: return "nv";
205 default: vpanic("showARM64CondCode");
206 }
207 }
208
209
210 /* --------- Memory address expressions (amodes). --------- */
211
ARM64AMode_RI9(HReg reg,Int simm9)212 ARM64AMode* ARM64AMode_RI9 ( HReg reg, Int simm9 ) {
213 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
214 am->tag = ARM64am_RI9;
215 am->ARM64am.RI9.reg = reg;
216 am->ARM64am.RI9.simm9 = simm9;
217 vassert(-256 <= simm9 && simm9 <= 255);
218 return am;
219 }
220
ARM64AMode_RI12(HReg reg,Int uimm12,UChar szB)221 ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) {
222 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
223 am->tag = ARM64am_RI12;
224 am->ARM64am.RI12.reg = reg;
225 am->ARM64am.RI12.uimm12 = uimm12;
226 am->ARM64am.RI12.szB = szB;
227 vassert(uimm12 >= 0 && uimm12 <= 4095);
228 switch (szB) {
229 case 1: case 2: case 4: case 8: break;
230 default: vassert(0);
231 }
232 return am;
233 }
234
ARM64AMode_RR(HReg base,HReg index)235 ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) {
236 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
237 am->tag = ARM64am_RR;
238 am->ARM64am.RR.base = base;
239 am->ARM64am.RR.index = index;
240 return am;
241 }
242
ppARM64AMode(ARM64AMode * am)243 static void ppARM64AMode ( ARM64AMode* am ) {
244 switch (am->tag) {
245 case ARM64am_RI9:
246 vex_printf("%d(", am->ARM64am.RI9.simm9);
247 ppHRegARM64(am->ARM64am.RI9.reg);
248 vex_printf(")");
249 break;
250 case ARM64am_RI12:
251 vex_printf("%u(", (UInt)am->ARM64am.RI12.szB
252 * (UInt)am->ARM64am.RI12.uimm12);
253 ppHRegARM64(am->ARM64am.RI12.reg);
254 vex_printf(")");
255 break;
256 case ARM64am_RR:
257 vex_printf("(");
258 ppHRegARM64(am->ARM64am.RR.base);
259 vex_printf(",");
260 ppHRegARM64(am->ARM64am.RR.index);
261 vex_printf(")");
262 break;
263 default:
264 vassert(0);
265 }
266 }
267
addRegUsage_ARM64AMode(HRegUsage * u,ARM64AMode * am)268 static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) {
269 switch (am->tag) {
270 case ARM64am_RI9:
271 addHRegUse(u, HRmRead, am->ARM64am.RI9.reg);
272 return;
273 case ARM64am_RI12:
274 addHRegUse(u, HRmRead, am->ARM64am.RI12.reg);
275 return;
276 case ARM64am_RR:
277 addHRegUse(u, HRmRead, am->ARM64am.RR.base);
278 addHRegUse(u, HRmRead, am->ARM64am.RR.index);
279 return;
280 default:
281 vpanic("addRegUsage_ARM64Amode");
282 }
283 }
284
mapRegs_ARM64AMode(HRegRemap * m,ARM64AMode * am)285 static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) {
286 switch (am->tag) {
287 case ARM64am_RI9:
288 am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg);
289 return;
290 case ARM64am_RI12:
291 am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg);
292 return;
293 case ARM64am_RR:
294 am->ARM64am.RR.base = lookupHRegRemap(m, am->ARM64am.RR.base);
295 am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index);
296 return;
297 default:
298 vpanic("mapRegs_ARM64Amode");
299 }
300 }
301
302
303 /* --------- Reg or uimm12<<{0,12} operands --------- */
304
ARM64RIA_I12(UShort imm12,UChar shift)305 ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) {
306 ARM64RIA* riA = LibVEX_Alloc_inline(sizeof(ARM64RIA));
307 riA->tag = ARM64riA_I12;
308 riA->ARM64riA.I12.imm12 = imm12;
309 riA->ARM64riA.I12.shift = shift;
310 vassert(imm12 < 4096);
311 vassert(shift == 0 || shift == 12);
312 return riA;
313 }
ARM64RIA_R(HReg reg)314 ARM64RIA* ARM64RIA_R ( HReg reg ) {
315 ARM64RIA* riA = LibVEX_Alloc_inline(sizeof(ARM64RIA));
316 riA->tag = ARM64riA_R;
317 riA->ARM64riA.R.reg = reg;
318 return riA;
319 }
320
ppARM64RIA(ARM64RIA * riA)321 static void ppARM64RIA ( ARM64RIA* riA ) {
322 switch (riA->tag) {
323 case ARM64riA_I12:
324 vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12
325 << riA->ARM64riA.I12.shift));
326 break;
327 case ARM64riA_R:
328 ppHRegARM64(riA->ARM64riA.R.reg);
329 break;
330 default:
331 vassert(0);
332 }
333 }
334
addRegUsage_ARM64RIA(HRegUsage * u,ARM64RIA * riA)335 static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) {
336 switch (riA->tag) {
337 case ARM64riA_I12:
338 return;
339 case ARM64riA_R:
340 addHRegUse(u, HRmRead, riA->ARM64riA.R.reg);
341 return;
342 default:
343 vpanic("addRegUsage_ARM64RIA");
344 }
345 }
346
mapRegs_ARM64RIA(HRegRemap * m,ARM64RIA * riA)347 static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) {
348 switch (riA->tag) {
349 case ARM64riA_I12:
350 return;
351 case ARM64riA_R:
352 riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg);
353 return;
354 default:
355 vpanic("mapRegs_ARM64RIA");
356 }
357 }
358
359
360 /* --------- Reg or "bitfield" (logic immediate) operands --------- */
361
ARM64RIL_I13(UChar bitN,UChar immR,UChar immS)362 ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) {
363 ARM64RIL* riL = LibVEX_Alloc_inline(sizeof(ARM64RIL));
364 riL->tag = ARM64riL_I13;
365 riL->ARM64riL.I13.bitN = bitN;
366 riL->ARM64riL.I13.immR = immR;
367 riL->ARM64riL.I13.immS = immS;
368 vassert(bitN < 2);
369 vassert(immR < 64);
370 vassert(immS < 64);
371 return riL;
372 }
ARM64RIL_R(HReg reg)373 ARM64RIL* ARM64RIL_R ( HReg reg ) {
374 ARM64RIL* riL = LibVEX_Alloc_inline(sizeof(ARM64RIL));
375 riL->tag = ARM64riL_R;
376 riL->ARM64riL.R.reg = reg;
377 return riL;
378 }
379
ppARM64RIL(ARM64RIL * riL)380 static void ppARM64RIL ( ARM64RIL* riL ) {
381 switch (riL->tag) {
382 case ARM64riL_I13:
383 vex_printf("#nrs(%u,%u,%u)",
384 (UInt)riL->ARM64riL.I13.bitN,
385 (UInt)riL->ARM64riL.I13.immR,
386 (UInt)riL->ARM64riL.I13.immS);
387 break;
388 case ARM64riL_R:
389 ppHRegARM64(riL->ARM64riL.R.reg);
390 break;
391 default:
392 vassert(0);
393 }
394 }
395
addRegUsage_ARM64RIL(HRegUsage * u,ARM64RIL * riL)396 static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) {
397 switch (riL->tag) {
398 case ARM64riL_I13:
399 return;
400 case ARM64riL_R:
401 addHRegUse(u, HRmRead, riL->ARM64riL.R.reg);
402 return;
403 default:
404 vpanic("addRegUsage_ARM64RIL");
405 }
406 }
407
mapRegs_ARM64RIL(HRegRemap * m,ARM64RIL * riL)408 static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) {
409 switch (riL->tag) {
410 case ARM64riL_I13:
411 return;
412 case ARM64riL_R:
413 riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg);
414 return;
415 default:
416 vpanic("mapRegs_ARM64RIL");
417 }
418 }
419
420
421 /* --------------- Reg or uimm6 operands --------------- */
422
ARM64RI6_I6(UInt imm6)423 ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) {
424 ARM64RI6* ri6 = LibVEX_Alloc_inline(sizeof(ARM64RI6));
425 ri6->tag = ARM64ri6_I6;
426 ri6->ARM64ri6.I6.imm6 = imm6;
427 vassert(imm6 > 0 && imm6 < 64);
428 return ri6;
429 }
ARM64RI6_R(HReg reg)430 ARM64RI6* ARM64RI6_R ( HReg reg ) {
431 ARM64RI6* ri6 = LibVEX_Alloc_inline(sizeof(ARM64RI6));
432 ri6->tag = ARM64ri6_R;
433 ri6->ARM64ri6.R.reg = reg;
434 return ri6;
435 }
436
ppARM64RI6(ARM64RI6 * ri6)437 static void ppARM64RI6 ( ARM64RI6* ri6 ) {
438 switch (ri6->tag) {
439 case ARM64ri6_I6:
440 vex_printf("#%u", ri6->ARM64ri6.I6.imm6);
441 break;
442 case ARM64ri6_R:
443 ppHRegARM64(ri6->ARM64ri6.R.reg);
444 break;
445 default:
446 vassert(0);
447 }
448 }
449
addRegUsage_ARM64RI6(HRegUsage * u,ARM64RI6 * ri6)450 static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) {
451 switch (ri6->tag) {
452 case ARM64ri6_I6:
453 return;
454 case ARM64ri6_R:
455 addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg);
456 return;
457 default:
458 vpanic("addRegUsage_ARM64RI6");
459 }
460 }
461
mapRegs_ARM64RI6(HRegRemap * m,ARM64RI6 * ri6)462 static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) {
463 switch (ri6->tag) {
464 case ARM64ri6_I6:
465 return;
466 case ARM64ri6_R:
467 ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg);
468 return;
469 default:
470 vpanic("mapRegs_ARM64RI6");
471 }
472 }
473
474
475 /* --------- Instructions. --------- */
476
showARM64LogicOp(ARM64LogicOp op)477 static const HChar* showARM64LogicOp ( ARM64LogicOp op ) {
478 switch (op) {
479 case ARM64lo_AND: return "and";
480 case ARM64lo_OR: return "orr";
481 case ARM64lo_XOR: return "eor";
482 default: vpanic("showARM64LogicOp");
483 }
484 }
485
showARM64ShiftOp(ARM64ShiftOp op)486 static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
487 switch (op) {
488 case ARM64sh_SHL: return "lsl";
489 case ARM64sh_SHR: return "lsr";
490 case ARM64sh_SAR: return "asr";
491 default: vpanic("showARM64ShiftOp");
492 }
493 }
494
showARM64UnaryOp(ARM64UnaryOp op)495 static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
496 switch (op) {
497 case ARM64un_NEG: return "neg";
498 case ARM64un_NOT: return "not";
499 case ARM64un_CLZ: return "clz";
500 default: vpanic("showARM64UnaryOp");
501 }
502 }
503
showARM64MulOp(ARM64MulOp op)504 static const HChar* showARM64MulOp ( ARM64MulOp op ) {
505 switch (op) {
506 case ARM64mul_PLAIN: return "mul ";
507 case ARM64mul_ZX: return "umulh";
508 case ARM64mul_SX: return "smulh";
509 default: vpanic("showARM64MulOp");
510 }
511 }
512
characteriseARM64CvtOp(HChar * syn,UInt * fszB,UInt * iszB,ARM64CvtOp op)513 static void characteriseARM64CvtOp ( /*OUT*/HChar* syn,
514 /*OUT*/UInt* fszB, /*OUT*/UInt* iszB,
515 ARM64CvtOp op ) {
516 switch (op) {
517 case ARM64cvt_F32_I32S:
518 *syn = 's'; *fszB = 4; *iszB = 4; break;
519 case ARM64cvt_F64_I32S:
520 *syn = 's'; *fszB = 8; *iszB = 4; break;
521 case ARM64cvt_F32_I64S:
522 *syn = 's'; *fszB = 4; *iszB = 8; break;
523 case ARM64cvt_F64_I64S:
524 *syn = 's'; *fszB = 8; *iszB = 8; break;
525 case ARM64cvt_F32_I32U:
526 *syn = 'u'; *fszB = 4; *iszB = 4; break;
527 case ARM64cvt_F64_I32U:
528 *syn = 'u'; *fszB = 8; *iszB = 4; break;
529 case ARM64cvt_F32_I64U:
530 *syn = 'u'; *fszB = 4; *iszB = 8; break;
531 case ARM64cvt_F64_I64U:
532 *syn = 'u'; *fszB = 8; *iszB = 8; break;
533 default:
534 vpanic("characteriseARM64CvtOp");
535 }
536 }
537
showARM64FpBinOp(ARM64FpBinOp op)538 static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
539 switch (op) {
540 case ARM64fpb_ADD: return "add";
541 case ARM64fpb_SUB: return "sub";
542 case ARM64fpb_MUL: return "mul";
543 case ARM64fpb_DIV: return "div";
544 default: vpanic("showARM64FpBinOp");
545 }
546 }
547
showARM64FpUnaryOp(ARM64FpUnaryOp op)548 static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
549 switch (op) {
550 case ARM64fpu_NEG: return "neg ";
551 case ARM64fpu_ABS: return "abs ";
552 case ARM64fpu_SQRT: return "sqrt ";
553 case ARM64fpu_RINT: return "rinti";
554 case ARM64fpu_RECPX: return "recpx";
555 default: vpanic("showARM64FpUnaryOp");
556 }
557 }
558
showARM64VecBinOp(const HChar ** nm,const HChar ** ar,ARM64VecBinOp op)559 static void showARM64VecBinOp(/*OUT*/const HChar** nm,
560 /*OUT*/const HChar** ar, ARM64VecBinOp op ) {
561 switch (op) {
562 case ARM64vecb_ADD64x2: *nm = "add "; *ar = "2d"; return;
563 case ARM64vecb_ADD32x4: *nm = "add "; *ar = "4s"; return;
564 case ARM64vecb_ADD16x8: *nm = "add "; *ar = "8h"; return;
565 case ARM64vecb_ADD8x16: *nm = "add "; *ar = "16b"; return;
566 case ARM64vecb_SUB64x2: *nm = "sub "; *ar = "2d"; return;
567 case ARM64vecb_SUB32x4: *nm = "sub "; *ar = "4s"; return;
568 case ARM64vecb_SUB16x8: *nm = "sub "; *ar = "8h"; return;
569 case ARM64vecb_SUB8x16: *nm = "sub "; *ar = "16b"; return;
570 case ARM64vecb_MUL32x4: *nm = "mul "; *ar = "4s"; return;
571 case ARM64vecb_MUL16x8: *nm = "mul "; *ar = "8h"; return;
572 case ARM64vecb_MUL8x16: *nm = "mul "; *ar = "16b"; return;
573 case ARM64vecb_FADD64x2: *nm = "fadd "; *ar = "2d"; return;
574 case ARM64vecb_FSUB64x2: *nm = "fsub "; *ar = "2d"; return;
575 case ARM64vecb_FMUL64x2: *nm = "fmul "; *ar = "2d"; return;
576 case ARM64vecb_FDIV64x2: *nm = "fdiv "; *ar = "2d"; return;
577 case ARM64vecb_FADD32x4: *nm = "fadd "; *ar = "4s"; return;
578 case ARM64vecb_FSUB32x4: *nm = "fsub "; *ar = "4s"; return;
579 case ARM64vecb_FMUL32x4: *nm = "fmul "; *ar = "4s"; return;
580 case ARM64vecb_FDIV32x4: *nm = "fdiv "; *ar = "4s"; return;
581 case ARM64vecb_FMAX64x2: *nm = "fmax "; *ar = "2d"; return;
582 case ARM64vecb_FMAX32x4: *nm = "fmax "; *ar = "4s"; return;
583 case ARM64vecb_FMIN64x2: *nm = "fmin "; *ar = "2d"; return;
584 case ARM64vecb_FMIN32x4: *nm = "fmin "; *ar = "4s"; return;
585 case ARM64vecb_UMAX32x4: *nm = "umax "; *ar = "4s"; return;
586 case ARM64vecb_UMAX16x8: *nm = "umax "; *ar = "8h"; return;
587 case ARM64vecb_UMAX8x16: *nm = "umax "; *ar = "16b"; return;
588 case ARM64vecb_UMIN32x4: *nm = "umin "; *ar = "4s"; return;
589 case ARM64vecb_UMIN16x8: *nm = "umin "; *ar = "8h"; return;
590 case ARM64vecb_UMIN8x16: *nm = "umin "; *ar = "16b"; return;
591 case ARM64vecb_SMAX32x4: *nm = "smax "; *ar = "4s"; return;
592 case ARM64vecb_SMAX16x8: *nm = "smax "; *ar = "8h"; return;
593 case ARM64vecb_SMAX8x16: *nm = "smax "; *ar = "16b"; return;
594 case ARM64vecb_SMIN32x4: *nm = "smin "; *ar = "4s"; return;
595 case ARM64vecb_SMIN16x8: *nm = "smin "; *ar = "8h"; return;
596 case ARM64vecb_SMIN8x16: *nm = "smin "; *ar = "16b"; return;
597 case ARM64vecb_AND: *nm = "and "; *ar = "16b"; return;
598 case ARM64vecb_ORR: *nm = "orr "; *ar = "16b"; return;
599 case ARM64vecb_XOR: *nm = "eor "; *ar = "16b"; return;
600 case ARM64vecb_CMEQ64x2: *nm = "cmeq "; *ar = "2d"; return;
601 case ARM64vecb_CMEQ32x4: *nm = "cmeq "; *ar = "4s"; return;
602 case ARM64vecb_CMEQ16x8: *nm = "cmeq "; *ar = "8h"; return;
603 case ARM64vecb_CMEQ8x16: *nm = "cmeq "; *ar = "16b"; return;
604 case ARM64vecb_CMHI64x2: *nm = "cmhi "; *ar = "2d"; return;
605 case ARM64vecb_CMHI32x4: *nm = "cmhi "; *ar = "4s"; return;
606 case ARM64vecb_CMHI16x8: *nm = "cmhi "; *ar = "8h"; return;
607 case ARM64vecb_CMHI8x16: *nm = "cmhi "; *ar = "16b"; return;
608 case ARM64vecb_CMGT64x2: *nm = "cmgt "; *ar = "2d"; return;
609 case ARM64vecb_CMGT32x4: *nm = "cmgt "; *ar = "4s"; return;
610 case ARM64vecb_CMGT16x8: *nm = "cmgt "; *ar = "8h"; return;
611 case ARM64vecb_CMGT8x16: *nm = "cmgt "; *ar = "16b"; return;
612 case ARM64vecb_FCMEQ64x2: *nm = "fcmeq "; *ar = "2d"; return;
613 case ARM64vecb_FCMEQ32x4: *nm = "fcmeq "; *ar = "4s"; return;
614 case ARM64vecb_FCMGE64x2: *nm = "fcmge "; *ar = "2d"; return;
615 case ARM64vecb_FCMGE32x4: *nm = "fcmge "; *ar = "4s"; return;
616 case ARM64vecb_FCMGT64x2: *nm = "fcmgt "; *ar = "2d"; return;
617 case ARM64vecb_FCMGT32x4: *nm = "fcmgt "; *ar = "4s"; return;
618 case ARM64vecb_TBL1: *nm = "tbl "; *ar = "16b"; return;
619 case ARM64vecb_UZP164x2: *nm = "uzp1 "; *ar = "2d"; return;
620 case ARM64vecb_UZP132x4: *nm = "uzp1 "; *ar = "4s"; return;
621 case ARM64vecb_UZP116x8: *nm = "uzp1 "; *ar = "8h"; return;
622 case ARM64vecb_UZP18x16: *nm = "uzp1 "; *ar = "16b"; return;
623 case ARM64vecb_UZP264x2: *nm = "uzp2 "; *ar = "2d"; return;
624 case ARM64vecb_UZP232x4: *nm = "uzp2 "; *ar = "4s"; return;
625 case ARM64vecb_UZP216x8: *nm = "uzp2 "; *ar = "8h"; return;
626 case ARM64vecb_UZP28x16: *nm = "uzp2 "; *ar = "16b"; return;
627 case ARM64vecb_ZIP132x4: *nm = "zip1 "; *ar = "4s"; return;
628 case ARM64vecb_ZIP116x8: *nm = "zip1 "; *ar = "8h"; return;
629 case ARM64vecb_ZIP18x16: *nm = "zip1 "; *ar = "16b"; return;
630 case ARM64vecb_ZIP232x4: *nm = "zip2 "; *ar = "4s"; return;
631 case ARM64vecb_ZIP216x8: *nm = "zip2 "; *ar = "8h"; return;
632 case ARM64vecb_ZIP28x16: *nm = "zip2 "; *ar = "16b"; return;
633 case ARM64vecb_PMUL8x16: *nm = "pmul "; *ar = "16b"; return;
634 case ARM64vecb_PMULL8x8: *nm = "pmull "; *ar = "8hbb"; return;
635 case ARM64vecb_UMULL2DSS: *nm = "umull "; *ar = "2dss"; return;
636 case ARM64vecb_UMULL4SHH: *nm = "umull "; *ar = "4shh"; return;
637 case ARM64vecb_UMULL8HBB: *nm = "umull "; *ar = "8hbb"; return;
638 case ARM64vecb_SMULL2DSS: *nm = "smull "; *ar = "2dss"; return;
639 case ARM64vecb_SMULL4SHH: *nm = "smull "; *ar = "4shh"; return;
640 case ARM64vecb_SMULL8HBB: *nm = "smull "; *ar = "8hbb"; return;
641 case ARM64vecb_SQADD64x2: *nm = "sqadd "; *ar = "2d"; return;
642 case ARM64vecb_SQADD32x4: *nm = "sqadd "; *ar = "4s"; return;
643 case ARM64vecb_SQADD16x8: *nm = "sqadd "; *ar = "8h"; return;
644 case ARM64vecb_SQADD8x16: *nm = "sqadd "; *ar = "16b"; return;
645 case ARM64vecb_UQADD64x2: *nm = "uqadd "; *ar = "2d"; return;
646 case ARM64vecb_UQADD32x4: *nm = "uqadd "; *ar = "4s"; return;
647 case ARM64vecb_UQADD16x8: *nm = "uqadd "; *ar = "8h"; return;
648 case ARM64vecb_UQADD8x16: *nm = "uqadd "; *ar = "16b"; return;
649 case ARM64vecb_SQSUB64x2: *nm = "sqsub "; *ar = "2d"; return;
650 case ARM64vecb_SQSUB32x4: *nm = "sqsub "; *ar = "4s"; return;
651 case ARM64vecb_SQSUB16x8: *nm = "sqsub "; *ar = "8h"; return;
652 case ARM64vecb_SQSUB8x16: *nm = "sqsub "; *ar = "16b"; return;
653 case ARM64vecb_UQSUB64x2: *nm = "uqsub "; *ar = "2d"; return;
654 case ARM64vecb_UQSUB32x4: *nm = "uqsub "; *ar = "4s"; return;
655 case ARM64vecb_UQSUB16x8: *nm = "uqsub "; *ar = "8h"; return;
656 case ARM64vecb_UQSUB8x16: *nm = "uqsub "; *ar = "16b"; return;
657 case ARM64vecb_SQDMULL2DSS: *nm = "sqdmull"; *ar = "2dss"; return;
658 case ARM64vecb_SQDMULL4SHH: *nm = "sqdmull"; *ar = "4shh"; return;
659 case ARM64vecb_SQDMULH32x4: *nm = "sqdmulh"; *ar = "4s"; return;
660 case ARM64vecb_SQDMULH16x8: *nm = "sqdmulh"; *ar = "8h"; return;
661 case ARM64vecb_SQRDMULH32x4: *nm = "sqrdmulh"; *ar = "4s"; return;
662 case ARM64vecb_SQRDMULH16x8: *nm = "sqrdmulh"; *ar = "8h"; return;
663 case ARM64vecb_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
664 case ARM64vecb_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
665 case ARM64vecb_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
666 case ARM64vecb_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
667 case ARM64vecb_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
668 case ARM64vecb_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
669 case ARM64vecb_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
670 case ARM64vecb_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
671 case ARM64vecb_SQRSHL64x2: *nm = "sqrshl"; *ar = "2d"; return;
672 case ARM64vecb_SQRSHL32x4: *nm = "sqrshl"; *ar = "4s"; return;
673 case ARM64vecb_SQRSHL16x8: *nm = "sqrshl"; *ar = "8h"; return;
674 case ARM64vecb_SQRSHL8x16: *nm = "sqrshl"; *ar = "16b"; return;
675 case ARM64vecb_UQRSHL64x2: *nm = "uqrshl"; *ar = "2d"; return;
676 case ARM64vecb_UQRSHL32x4: *nm = "uqrshl"; *ar = "4s"; return;
677 case ARM64vecb_UQRSHL16x8: *nm = "uqrshl"; *ar = "8h"; return;
678 case ARM64vecb_UQRSHL8x16: *nm = "uqrshl"; *ar = "16b"; return;
679 case ARM64vecb_SSHL64x2: *nm = "sshl "; *ar = "2d"; return;
680 case ARM64vecb_SSHL32x4: *nm = "sshl "; *ar = "4s"; return;
681 case ARM64vecb_SSHL16x8: *nm = "sshl "; *ar = "8h"; return;
682 case ARM64vecb_SSHL8x16: *nm = "sshl "; *ar = "16b"; return;
683 case ARM64vecb_USHL64x2: *nm = "ushl "; *ar = "2d"; return;
684 case ARM64vecb_USHL32x4: *nm = "ushl "; *ar = "4s"; return;
685 case ARM64vecb_USHL16x8: *nm = "ushl "; *ar = "8h"; return;
686 case ARM64vecb_USHL8x16: *nm = "ushl "; *ar = "16b"; return;
687 case ARM64vecb_SRSHL64x2: *nm = "srshl "; *ar = "2d"; return;
688 case ARM64vecb_SRSHL32x4: *nm = "srshl "; *ar = "4s"; return;
689 case ARM64vecb_SRSHL16x8: *nm = "srshl "; *ar = "8h"; return;
690 case ARM64vecb_SRSHL8x16: *nm = "srshl "; *ar = "16b"; return;
691 case ARM64vecb_URSHL64x2: *nm = "urshl "; *ar = "2d"; return;
692 case ARM64vecb_URSHL32x4: *nm = "urshl "; *ar = "4s"; return;
693 case ARM64vecb_URSHL16x8: *nm = "urshl "; *ar = "8h"; return;
694 case ARM64vecb_URSHL8x16: *nm = "urshl "; *ar = "16b"; return;
695 case ARM64vecb_FRECPS64x2: *nm = "frecps"; *ar = "2d"; return;
696 case ARM64vecb_FRECPS32x4: *nm = "frecps"; *ar = "4s"; return;
697 case ARM64vecb_FRSQRTS64x2: *nm = "frsqrts"; *ar = "2d"; return;
698 case ARM64vecb_FRSQRTS32x4: *nm = "frsqrts"; *ar = "4s"; return;
699 default: vpanic("showARM64VecBinOp");
700 }
701 }
702
showARM64VecModifyOp(const HChar ** nm,const HChar ** ar,ARM64VecModifyOp op)703 static void showARM64VecModifyOp(/*OUT*/const HChar** nm,
704 /*OUT*/const HChar** ar,
705 ARM64VecModifyOp op ) {
706 switch (op) {
707 case ARM64vecmo_SUQADD64x2: *nm = "suqadd"; *ar = "2d"; return;
708 case ARM64vecmo_SUQADD32x4: *nm = "suqadd"; *ar = "4s"; return;
709 case ARM64vecmo_SUQADD16x8: *nm = "suqadd"; *ar = "8h"; return;
710 case ARM64vecmo_SUQADD8x16: *nm = "suqadd"; *ar = "16b"; return;
711 case ARM64vecmo_USQADD64x2: *nm = "usqadd"; *ar = "2d"; return;
712 case ARM64vecmo_USQADD32x4: *nm = "usqadd"; *ar = "4s"; return;
713 case ARM64vecmo_USQADD16x8: *nm = "usqadd"; *ar = "8h"; return;
714 case ARM64vecmo_USQADD8x16: *nm = "usqadd"; *ar = "16b"; return;
715 default: vpanic("showARM64VecModifyOp");
716 }
717 }
718
showARM64VecUnaryOp(const HChar ** nm,const HChar ** ar,ARM64VecUnaryOp op)719 static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
720 /*OUT*/const HChar** ar, ARM64VecUnaryOp op )
721 {
722 switch (op) {
723 case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return;
724 case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return;
725 case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return;
726 case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return;
727 case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return;
728 case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return;
729 case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return;
730 case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return;
731 case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return;
732 case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return;
733 case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return;
734 case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return;
735 case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return;
736 case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return;
737 case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return;
738 case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return;
739 case ARM64vecu_RBIT: *nm = "rbit "; *ar = "16b"; return;
740 case ARM64vecu_REV1616B: *nm = "rev16"; *ar = "16b"; return;
741 case ARM64vecu_REV3216B: *nm = "rev32"; *ar = "16b"; return;
742 case ARM64vecu_REV328H: *nm = "rev32"; *ar = "8h"; return;
743 case ARM64vecu_REV6416B: *nm = "rev64"; *ar = "16b"; return;
744 case ARM64vecu_REV648H: *nm = "rev64"; *ar = "8h"; return;
745 case ARM64vecu_REV644S: *nm = "rev64"; *ar = "4s"; return;
746 case ARM64vecu_URECPE32x4: *nm = "urecpe"; *ar = "4s"; return;
747 case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s"; return;
748 case ARM64vecu_FRECPE64x2: *nm = "frecpe"; *ar = "2d"; return;
749 case ARM64vecu_FRECPE32x4: *nm = "frecpe"; *ar = "4s"; return;
750 case ARM64vecu_FRSQRTE64x2: *nm = "frsqrte"; *ar = "2d"; return;
751 case ARM64vecu_FRSQRTE32x4: *nm = "frsqrte"; *ar = "4s"; return;
752 case ARM64vecu_FSQRT64x2: *nm = "fsqrt"; *ar = "2d"; return;
753 case ARM64vecu_FSQRT32x4: *nm = "fsqrt"; *ar = "4s"; return;
754 default: vpanic("showARM64VecUnaryOp");
755 }
756 }
757
showARM64VecShiftImmOp(const HChar ** nm,const HChar ** ar,ARM64VecShiftImmOp op)758 static void showARM64VecShiftImmOp(/*OUT*/const HChar** nm,
759 /*OUT*/const HChar** ar,
760 ARM64VecShiftImmOp op )
761 {
762 switch (op) {
763 case ARM64vecshi_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
764 case ARM64vecshi_USHR32x4: *nm = "ushr "; *ar = "4s"; return;
765 case ARM64vecshi_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
766 case ARM64vecshi_USHR8x16: *nm = "ushr "; *ar = "16b"; return;
767 case ARM64vecshi_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
768 case ARM64vecshi_SSHR32x4: *nm = "sshr "; *ar = "4s"; return;
769 case ARM64vecshi_SSHR16x8: *nm = "sshr "; *ar = "8h"; return;
770 case ARM64vecshi_SSHR8x16: *nm = "sshr "; *ar = "16b"; return;
771 case ARM64vecshi_SHL64x2: *nm = "shl "; *ar = "2d"; return;
772 case ARM64vecshi_SHL32x4: *nm = "shl "; *ar = "4s"; return;
773 case ARM64vecshi_SHL16x8: *nm = "shl "; *ar = "8h"; return;
774 case ARM64vecshi_SHL8x16: *nm = "shl "; *ar = "16b"; return;
775 case ARM64vecshi_SQSHRN2SD: *nm = "sqshrn"; *ar = "2sd"; return;
776 case ARM64vecshi_SQSHRN4HS: *nm = "sqshrn"; *ar = "4hs"; return;
777 case ARM64vecshi_SQSHRN8BH: *nm = "sqshrn"; *ar = "8bh"; return;
778 case ARM64vecshi_UQSHRN2SD: *nm = "uqshrn"; *ar = "2sd"; return;
779 case ARM64vecshi_UQSHRN4HS: *nm = "uqshrn"; *ar = "4hs"; return;
780 case ARM64vecshi_UQSHRN8BH: *nm = "uqshrn"; *ar = "8bh"; return;
781 case ARM64vecshi_SQSHRUN2SD: *nm = "sqshrun"; *ar = "2sd"; return;
782 case ARM64vecshi_SQSHRUN4HS: *nm = "sqshrun"; *ar = "4hs"; return;
783 case ARM64vecshi_SQSHRUN8BH: *nm = "sqshrun"; *ar = "8bh"; return;
784 case ARM64vecshi_SQRSHRN2SD: *nm = "sqrshrn"; *ar = "2sd"; return;
785 case ARM64vecshi_SQRSHRN4HS: *nm = "sqrshrn"; *ar = "4hs"; return;
786 case ARM64vecshi_SQRSHRN8BH: *nm = "sqrshrn"; *ar = "8bh"; return;
787 case ARM64vecshi_UQRSHRN2SD: *nm = "uqrshrn"; *ar = "2sd"; return;
788 case ARM64vecshi_UQRSHRN4HS: *nm = "uqrshrn"; *ar = "4hs"; return;
789 case ARM64vecshi_UQRSHRN8BH: *nm = "uqrshrn"; *ar = "8bh"; return;
790 case ARM64vecshi_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
791 case ARM64vecshi_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
792 case ARM64vecshi_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
793 case ARM64vecshi_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
794 case ARM64vecshi_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
795 case ARM64vecshi_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
796 case ARM64vecshi_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
797 case ARM64vecshi_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
798 case ARM64vecshi_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
799 case ARM64vecshi_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
800 case ARM64vecshi_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
801 case ARM64vecshi_SQSHLU64x2: *nm = "sqshlu"; *ar = "2d"; return;
802 case ARM64vecshi_SQSHLU32x4: *nm = "sqshlu"; *ar = "4s"; return;
803 case ARM64vecshi_SQSHLU16x8: *nm = "sqshlu"; *ar = "8h"; return;
804 case ARM64vecshi_SQSHLU8x16: *nm = "sqshlu"; *ar = "16b"; return;
805 default: vpanic("showARM64VecShiftImmOp");
806 }
807 }
808
showARM64VecNarrowOp(ARM64VecNarrowOp op)809 static const HChar* showARM64VecNarrowOp(ARM64VecNarrowOp op) {
810 switch (op) {
811 case ARM64vecna_XTN: return "xtn ";
812 case ARM64vecna_SQXTN: return "sqxtn ";
813 case ARM64vecna_UQXTN: return "uqxtn ";
814 case ARM64vecna_SQXTUN: return "sqxtun";
815 default: vpanic("showARM64VecNarrowOp");
816 }
817 }
818
ARM64Instr_Arith(HReg dst,HReg argL,ARM64RIA * argR,Bool isAdd)819 ARM64Instr* ARM64Instr_Arith ( HReg dst,
820 HReg argL, ARM64RIA* argR, Bool isAdd ) {
821 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
822 i->tag = ARM64in_Arith;
823 i->ARM64in.Arith.dst = dst;
824 i->ARM64in.Arith.argL = argL;
825 i->ARM64in.Arith.argR = argR;
826 i->ARM64in.Arith.isAdd = isAdd;
827 return i;
828 }
ARM64Instr_Cmp(HReg argL,ARM64RIA * argR,Bool is64)829 ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) {
830 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
831 i->tag = ARM64in_Cmp;
832 i->ARM64in.Cmp.argL = argL;
833 i->ARM64in.Cmp.argR = argR;
834 i->ARM64in.Cmp.is64 = is64;
835 return i;
836 }
ARM64Instr_Logic(HReg dst,HReg argL,ARM64RIL * argR,ARM64LogicOp op)837 ARM64Instr* ARM64Instr_Logic ( HReg dst,
838 HReg argL, ARM64RIL* argR, ARM64LogicOp op ) {
839 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
840 i->tag = ARM64in_Logic;
841 i->ARM64in.Logic.dst = dst;
842 i->ARM64in.Logic.argL = argL;
843 i->ARM64in.Logic.argR = argR;
844 i->ARM64in.Logic.op = op;
845 return i;
846 }
ARM64Instr_Test(HReg argL,ARM64RIL * argR)847 ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
848 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
849 i->tag = ARM64in_Test;
850 i->ARM64in.Test.argL = argL;
851 i->ARM64in.Test.argR = argR;
852 return i;
853 }
ARM64Instr_Shift(HReg dst,HReg argL,ARM64RI6 * argR,ARM64ShiftOp op)854 ARM64Instr* ARM64Instr_Shift ( HReg dst,
855 HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) {
856 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
857 i->tag = ARM64in_Shift;
858 i->ARM64in.Shift.dst = dst;
859 i->ARM64in.Shift.argL = argL;
860 i->ARM64in.Shift.argR = argR;
861 i->ARM64in.Shift.op = op;
862 return i;
863 }
ARM64Instr_Unary(HReg dst,HReg src,ARM64UnaryOp op)864 ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) {
865 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
866 i->tag = ARM64in_Unary;
867 i->ARM64in.Unary.dst = dst;
868 i->ARM64in.Unary.src = src;
869 i->ARM64in.Unary.op = op;
870 return i;
871 }
ARM64Instr_MovI(HReg dst,HReg src)872 ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) {
873 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
874 i->tag = ARM64in_MovI;
875 i->ARM64in.MovI.dst = dst;
876 i->ARM64in.MovI.src = src;
877 vassert(hregClass(src) == HRcInt64);
878 vassert(hregClass(dst) == HRcInt64);
879 return i;
880 }
ARM64Instr_Imm64(HReg dst,ULong imm64)881 ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) {
882 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
883 i->tag = ARM64in_Imm64;
884 i->ARM64in.Imm64.dst = dst;
885 i->ARM64in.Imm64.imm64 = imm64;
886 return i;
887 }
ARM64Instr_LdSt64(Bool isLoad,HReg rD,ARM64AMode * amode)888 ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
889 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
890 i->tag = ARM64in_LdSt64;
891 i->ARM64in.LdSt64.isLoad = isLoad;
892 i->ARM64in.LdSt64.rD = rD;
893 i->ARM64in.LdSt64.amode = amode;
894 return i;
895 }
ARM64Instr_LdSt32(Bool isLoad,HReg rD,ARM64AMode * amode)896 ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
897 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
898 i->tag = ARM64in_LdSt32;
899 i->ARM64in.LdSt32.isLoad = isLoad;
900 i->ARM64in.LdSt32.rD = rD;
901 i->ARM64in.LdSt32.amode = amode;
902 return i;
903 }
ARM64Instr_LdSt16(Bool isLoad,HReg rD,ARM64AMode * amode)904 ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
905 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
906 i->tag = ARM64in_LdSt16;
907 i->ARM64in.LdSt16.isLoad = isLoad;
908 i->ARM64in.LdSt16.rD = rD;
909 i->ARM64in.LdSt16.amode = amode;
910 return i;
911 }
ARM64Instr_LdSt8(Bool isLoad,HReg rD,ARM64AMode * amode)912 ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
913 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
914 i->tag = ARM64in_LdSt8;
915 i->ARM64in.LdSt8.isLoad = isLoad;
916 i->ARM64in.LdSt8.rD = rD;
917 i->ARM64in.LdSt8.amode = amode;
918 return i;
919 }
ARM64Instr_XDirect(Addr64 dstGA,ARM64AMode * amPC,ARM64CondCode cond,Bool toFastEP)920 ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
921 ARM64CondCode cond, Bool toFastEP ) {
922 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
923 i->tag = ARM64in_XDirect;
924 i->ARM64in.XDirect.dstGA = dstGA;
925 i->ARM64in.XDirect.amPC = amPC;
926 i->ARM64in.XDirect.cond = cond;
927 i->ARM64in.XDirect.toFastEP = toFastEP;
928 return i;
929 }
ARM64Instr_XIndir(HReg dstGA,ARM64AMode * amPC,ARM64CondCode cond)930 ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
931 ARM64CondCode cond ) {
932 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
933 i->tag = ARM64in_XIndir;
934 i->ARM64in.XIndir.dstGA = dstGA;
935 i->ARM64in.XIndir.amPC = amPC;
936 i->ARM64in.XIndir.cond = cond;
937 return i;
938 }
ARM64Instr_XAssisted(HReg dstGA,ARM64AMode * amPC,ARM64CondCode cond,IRJumpKind jk)939 ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
940 ARM64CondCode cond, IRJumpKind jk ) {
941 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
942 i->tag = ARM64in_XAssisted;
943 i->ARM64in.XAssisted.dstGA = dstGA;
944 i->ARM64in.XAssisted.amPC = amPC;
945 i->ARM64in.XAssisted.cond = cond;
946 i->ARM64in.XAssisted.jk = jk;
947 return i;
948 }
ARM64Instr_CSel(HReg dst,HReg argL,HReg argR,ARM64CondCode cond)949 ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
950 ARM64CondCode cond ) {
951 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
952 i->tag = ARM64in_CSel;
953 i->ARM64in.CSel.dst = dst;
954 i->ARM64in.CSel.argL = argL;
955 i->ARM64in.CSel.argR = argR;
956 i->ARM64in.CSel.cond = cond;
957 return i;
958 }
ARM64Instr_Call(ARM64CondCode cond,Addr64 target,Int nArgRegs,RetLoc rloc)959 ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, Addr64 target, Int nArgRegs,
960 RetLoc rloc ) {
961 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
962 i->tag = ARM64in_Call;
963 i->ARM64in.Call.cond = cond;
964 i->ARM64in.Call.target = target;
965 i->ARM64in.Call.nArgRegs = nArgRegs;
966 i->ARM64in.Call.rloc = rloc;
967 vassert(is_sane_RetLoc(rloc));
968 return i;
969 }
ARM64Instr_AddToSP(Int simm)970 extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) {
971 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
972 i->tag = ARM64in_AddToSP;
973 i->ARM64in.AddToSP.simm = simm;
974 vassert(-4096 < simm && simm < 4096);
975 vassert(0 == (simm & 0xF));
976 return i;
977 }
ARM64Instr_FromSP(HReg dst)978 extern ARM64Instr* ARM64Instr_FromSP ( HReg dst ) {
979 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
980 i->tag = ARM64in_FromSP;
981 i->ARM64in.FromSP.dst = dst;
982 return i;
983 }
ARM64Instr_Mul(HReg dst,HReg argL,HReg argR,ARM64MulOp op)984 ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
985 ARM64MulOp op ) {
986 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
987 i->tag = ARM64in_Mul;
988 i->ARM64in.Mul.dst = dst;
989 i->ARM64in.Mul.argL = argL;
990 i->ARM64in.Mul.argR = argR;
991 i->ARM64in.Mul.op = op;
992 return i;
993 }
ARM64Instr_LdrEX(Int szB)994 ARM64Instr* ARM64Instr_LdrEX ( Int szB ) {
995 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
996 i->tag = ARM64in_LdrEX;
997 i->ARM64in.LdrEX.szB = szB;
998 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
999 return i;
1000 }
ARM64Instr_StrEX(Int szB)1001 ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
1002 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1003 i->tag = ARM64in_StrEX;
1004 i->ARM64in.StrEX.szB = szB;
1005 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1006 return i;
1007 }
ARM64Instr_CAS(Int szB)1008 ARM64Instr* ARM64Instr_CAS ( Int szB ) {
1009 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1010 i->tag = ARM64in_CAS;
1011 i->ARM64in.CAS.szB = szB;
1012 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1013 return i;
1014 }
ARM64Instr_MFence(void)1015 ARM64Instr* ARM64Instr_MFence ( void ) {
1016 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1017 i->tag = ARM64in_MFence;
1018 return i;
1019 }
ARM64Instr_ClrEX(void)1020 ARM64Instr* ARM64Instr_ClrEX ( void ) {
1021 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1022 i->tag = ARM64in_ClrEX;
1023 return i;
1024 }
ARM64Instr_VLdStH(Bool isLoad,HReg sD,HReg rN,UInt uimm12)1025 ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1026 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1027 i->tag = ARM64in_VLdStH;
1028 i->ARM64in.VLdStH.isLoad = isLoad;
1029 i->ARM64in.VLdStH.hD = sD;
1030 i->ARM64in.VLdStH.rN = rN;
1031 i->ARM64in.VLdStH.uimm12 = uimm12;
1032 vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
1033 return i;
1034 }
ARM64Instr_VLdStS(Bool isLoad,HReg sD,HReg rN,UInt uimm12)1035 ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1036 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1037 i->tag = ARM64in_VLdStS;
1038 i->ARM64in.VLdStS.isLoad = isLoad;
1039 i->ARM64in.VLdStS.sD = sD;
1040 i->ARM64in.VLdStS.rN = rN;
1041 i->ARM64in.VLdStS.uimm12 = uimm12;
1042 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
1043 return i;
1044 }
ARM64Instr_VLdStD(Bool isLoad,HReg dD,HReg rN,UInt uimm12)1045 ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
1046 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1047 i->tag = ARM64in_VLdStD;
1048 i->ARM64in.VLdStD.isLoad = isLoad;
1049 i->ARM64in.VLdStD.dD = dD;
1050 i->ARM64in.VLdStD.rN = rN;
1051 i->ARM64in.VLdStD.uimm12 = uimm12;
1052 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
1053 return i;
1054 }
ARM64Instr_VLdStQ(Bool isLoad,HReg rQ,HReg rN)1055 ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) {
1056 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1057 i->tag = ARM64in_VLdStQ;
1058 i->ARM64in.VLdStQ.isLoad = isLoad;
1059 i->ARM64in.VLdStQ.rQ = rQ;
1060 i->ARM64in.VLdStQ.rN = rN;
1061 return i;
1062 }
ARM64Instr_VCvtI2F(ARM64CvtOp how,HReg rD,HReg rS)1063 ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
1064 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1065 i->tag = ARM64in_VCvtI2F;
1066 i->ARM64in.VCvtI2F.how = how;
1067 i->ARM64in.VCvtI2F.rD = rD;
1068 i->ARM64in.VCvtI2F.rS = rS;
1069 return i;
1070 }
ARM64Instr_VCvtF2I(ARM64CvtOp how,HReg rD,HReg rS,UChar armRM)1071 ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
1072 UChar armRM ) {
1073 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1074 i->tag = ARM64in_VCvtF2I;
1075 i->ARM64in.VCvtF2I.how = how;
1076 i->ARM64in.VCvtF2I.rD = rD;
1077 i->ARM64in.VCvtF2I.rS = rS;
1078 i->ARM64in.VCvtF2I.armRM = armRM;
1079 vassert(armRM <= 3);
1080 return i;
1081 }
ARM64Instr_VCvtSD(Bool sToD,HReg dst,HReg src)1082 ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1083 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1084 i->tag = ARM64in_VCvtSD;
1085 i->ARM64in.VCvtSD.sToD = sToD;
1086 i->ARM64in.VCvtSD.dst = dst;
1087 i->ARM64in.VCvtSD.src = src;
1088 return i;
1089 }
ARM64Instr_VCvtHS(Bool hToS,HReg dst,HReg src)1090 ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ) {
1091 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1092 i->tag = ARM64in_VCvtHS;
1093 i->ARM64in.VCvtHS.hToS = hToS;
1094 i->ARM64in.VCvtHS.dst = dst;
1095 i->ARM64in.VCvtHS.src = src;
1096 return i;
1097 }
ARM64Instr_VCvtHD(Bool hToD,HReg dst,HReg src)1098 ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ) {
1099 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1100 i->tag = ARM64in_VCvtHD;
1101 i->ARM64in.VCvtHD.hToD = hToD;
1102 i->ARM64in.VCvtHD.dst = dst;
1103 i->ARM64in.VCvtHD.src = src;
1104 return i;
1105 }
ARM64Instr_VUnaryD(ARM64FpUnaryOp op,HReg dst,HReg src)1106 ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1107 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1108 i->tag = ARM64in_VUnaryD;
1109 i->ARM64in.VUnaryD.op = op;
1110 i->ARM64in.VUnaryD.dst = dst;
1111 i->ARM64in.VUnaryD.src = src;
1112 return i;
1113 }
ARM64Instr_VUnaryS(ARM64FpUnaryOp op,HReg dst,HReg src)1114 ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1115 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1116 i->tag = ARM64in_VUnaryS;
1117 i->ARM64in.VUnaryS.op = op;
1118 i->ARM64in.VUnaryS.dst = dst;
1119 i->ARM64in.VUnaryS.src = src;
1120 return i;
1121 }
ARM64Instr_VBinD(ARM64FpBinOp op,HReg dst,HReg argL,HReg argR)1122 ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op,
1123 HReg dst, HReg argL, HReg argR ) {
1124 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1125 i->tag = ARM64in_VBinD;
1126 i->ARM64in.VBinD.op = op;
1127 i->ARM64in.VBinD.dst = dst;
1128 i->ARM64in.VBinD.argL = argL;
1129 i->ARM64in.VBinD.argR = argR;
1130 return i;
1131 }
ARM64Instr_VBinS(ARM64FpBinOp op,HReg dst,HReg argL,HReg argR)1132 ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
1133 HReg dst, HReg argL, HReg argR ) {
1134 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1135 i->tag = ARM64in_VBinS;
1136 i->ARM64in.VBinS.op = op;
1137 i->ARM64in.VBinS.dst = dst;
1138 i->ARM64in.VBinS.argL = argL;
1139 i->ARM64in.VBinS.argR = argR;
1140 return i;
1141 }
ARM64Instr_VCmpD(HReg argL,HReg argR)1142 ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
1143 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1144 i->tag = ARM64in_VCmpD;
1145 i->ARM64in.VCmpD.argL = argL;
1146 i->ARM64in.VCmpD.argR = argR;
1147 return i;
1148 }
ARM64Instr_VCmpS(HReg argL,HReg argR)1149 ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) {
1150 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1151 i->tag = ARM64in_VCmpS;
1152 i->ARM64in.VCmpS.argL = argL;
1153 i->ARM64in.VCmpS.argR = argR;
1154 return i;
1155 }
ARM64Instr_VFCSel(HReg dst,HReg argL,HReg argR,ARM64CondCode cond,Bool isD)1156 ARM64Instr* ARM64Instr_VFCSel ( HReg dst, HReg argL, HReg argR,
1157 ARM64CondCode cond, Bool isD ) {
1158 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1159 i->tag = ARM64in_VFCSel;
1160 i->ARM64in.VFCSel.dst = dst;
1161 i->ARM64in.VFCSel.argL = argL;
1162 i->ARM64in.VFCSel.argR = argR;
1163 i->ARM64in.VFCSel.cond = cond;
1164 i->ARM64in.VFCSel.isD = isD;
1165 return i;
1166 }
ARM64Instr_FPCR(Bool toFPCR,HReg iReg)1167 ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) {
1168 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1169 i->tag = ARM64in_FPCR;
1170 i->ARM64in.FPCR.toFPCR = toFPCR;
1171 i->ARM64in.FPCR.iReg = iReg;
1172 return i;
1173 }
ARM64Instr_FPSR(Bool toFPSR,HReg iReg)1174 ARM64Instr* ARM64Instr_FPSR ( Bool toFPSR, HReg iReg ) {
1175 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1176 i->tag = ARM64in_FPSR;
1177 i->ARM64in.FPSR.toFPSR = toFPSR;
1178 i->ARM64in.FPSR.iReg = iReg;
1179 return i;
1180 }
ARM64Instr_VBinV(ARM64VecBinOp op,HReg dst,HReg argL,HReg argR)1181 ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op,
1182 HReg dst, HReg argL, HReg argR ) {
1183 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1184 i->tag = ARM64in_VBinV;
1185 i->ARM64in.VBinV.op = op;
1186 i->ARM64in.VBinV.dst = dst;
1187 i->ARM64in.VBinV.argL = argL;
1188 i->ARM64in.VBinV.argR = argR;
1189 return i;
1190 }
ARM64Instr_VModifyV(ARM64VecModifyOp op,HReg mod,HReg arg)1191 ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp op, HReg mod, HReg arg ) {
1192 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1193 i->tag = ARM64in_VModifyV;
1194 i->ARM64in.VModifyV.op = op;
1195 i->ARM64in.VModifyV.mod = mod;
1196 i->ARM64in.VModifyV.arg = arg;
1197 return i;
1198 }
ARM64Instr_VUnaryV(ARM64VecUnaryOp op,HReg dst,HReg arg)1199 ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) {
1200 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1201 i->tag = ARM64in_VUnaryV;
1202 i->ARM64in.VUnaryV.op = op;
1203 i->ARM64in.VUnaryV.dst = dst;
1204 i->ARM64in.VUnaryV.arg = arg;
1205 return i;
1206 }
ARM64Instr_VNarrowV(ARM64VecNarrowOp op,UInt dszBlg2,HReg dst,HReg src)1207 ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op,
1208 UInt dszBlg2, HReg dst, HReg src ) {
1209 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1210 i->tag = ARM64in_VNarrowV;
1211 i->ARM64in.VNarrowV.op = op;
1212 i->ARM64in.VNarrowV.dszBlg2 = dszBlg2;
1213 i->ARM64in.VNarrowV.dst = dst;
1214 i->ARM64in.VNarrowV.src = src;
1215 vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2);
1216 return i;
1217 }
ARM64Instr_VShiftImmV(ARM64VecShiftImmOp op,HReg dst,HReg src,UInt amt)1218 ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op,
1219 HReg dst, HReg src, UInt amt ) {
1220 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1221 i->tag = ARM64in_VShiftImmV;
1222 i->ARM64in.VShiftImmV.op = op;
1223 i->ARM64in.VShiftImmV.dst = dst;
1224 i->ARM64in.VShiftImmV.src = src;
1225 i->ARM64in.VShiftImmV.amt = amt;
1226 UInt minSh = 0;
1227 UInt maxSh = 0;
1228 switch (op) {
1229 /* For right shifts, the allowed shift amounts are 1 .. lane_size.
1230 For left shifts, the allowed shift amounts are 0 .. lane_size-1.
1231 */
1232 case ARM64vecshi_USHR64x2: case ARM64vecshi_SSHR64x2:
1233 case ARM64vecshi_UQSHRN2SD: case ARM64vecshi_SQSHRN2SD:
1234 case ARM64vecshi_SQSHRUN2SD:
1235 case ARM64vecshi_UQRSHRN2SD: case ARM64vecshi_SQRSHRN2SD:
1236 case ARM64vecshi_SQRSHRUN2SD:
1237 minSh = 1; maxSh = 64; break;
1238 case ARM64vecshi_SHL64x2:
1239 case ARM64vecshi_UQSHL64x2: case ARM64vecshi_SQSHL64x2:
1240 case ARM64vecshi_SQSHLU64x2:
1241 minSh = 0; maxSh = 63; break;
1242 case ARM64vecshi_USHR32x4: case ARM64vecshi_SSHR32x4:
1243 case ARM64vecshi_UQSHRN4HS: case ARM64vecshi_SQSHRN4HS:
1244 case ARM64vecshi_SQSHRUN4HS:
1245 case ARM64vecshi_UQRSHRN4HS: case ARM64vecshi_SQRSHRN4HS:
1246 case ARM64vecshi_SQRSHRUN4HS:
1247 minSh = 1; maxSh = 32; break;
1248 case ARM64vecshi_SHL32x4:
1249 case ARM64vecshi_UQSHL32x4: case ARM64vecshi_SQSHL32x4:
1250 case ARM64vecshi_SQSHLU32x4:
1251 minSh = 0; maxSh = 31; break;
1252 case ARM64vecshi_USHR16x8: case ARM64vecshi_SSHR16x8:
1253 case ARM64vecshi_UQSHRN8BH: case ARM64vecshi_SQSHRN8BH:
1254 case ARM64vecshi_SQSHRUN8BH:
1255 case ARM64vecshi_UQRSHRN8BH: case ARM64vecshi_SQRSHRN8BH:
1256 case ARM64vecshi_SQRSHRUN8BH:
1257 minSh = 1; maxSh = 16; break;
1258 case ARM64vecshi_SHL16x8:
1259 case ARM64vecshi_UQSHL16x8: case ARM64vecshi_SQSHL16x8:
1260 case ARM64vecshi_SQSHLU16x8:
1261 minSh = 0; maxSh = 15; break;
1262 case ARM64vecshi_USHR8x16: case ARM64vecshi_SSHR8x16:
1263 minSh = 1; maxSh = 8; break;
1264 case ARM64vecshi_SHL8x16:
1265 case ARM64vecshi_UQSHL8x16: case ARM64vecshi_SQSHL8x16:
1266 case ARM64vecshi_SQSHLU8x16:
1267 minSh = 0; maxSh = 7; break;
1268 default:
1269 vassert(0);
1270 }
1271 vassert(maxSh > 0);
1272 vassert(amt >= minSh && amt <= maxSh);
1273 return i;
1274 }
ARM64Instr_VExtV(HReg dst,HReg srcLo,HReg srcHi,UInt amtB)1275 ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
1276 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1277 i->tag = ARM64in_VExtV;
1278 i->ARM64in.VExtV.dst = dst;
1279 i->ARM64in.VExtV.srcLo = srcLo;
1280 i->ARM64in.VExtV.srcHi = srcHi;
1281 i->ARM64in.VExtV.amtB = amtB;
1282 vassert(amtB >= 1 && amtB <= 15);
1283 return i;
1284 }
ARM64Instr_VImmQ(HReg rQ,UShort imm)1285 ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) {
1286 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1287 i->tag = ARM64in_VImmQ;
1288 i->ARM64in.VImmQ.rQ = rQ;
1289 i->ARM64in.VImmQ.imm = imm;
1290 /* Check that this is something that can actually be emitted. */
1291 switch (imm) {
1292 case 0x0000: case 0x0001: case 0x0003:
1293 case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
1294 break;
1295 default:
1296 vassert(0);
1297 }
1298 return i;
1299 }
ARM64Instr_VDfromX(HReg rD,HReg rX)1300 ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) {
1301 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1302 i->tag = ARM64in_VDfromX;
1303 i->ARM64in.VDfromX.rD = rD;
1304 i->ARM64in.VDfromX.rX = rX;
1305 return i;
1306 }
ARM64Instr_VQfromX(HReg rQ,HReg rXlo)1307 ARM64Instr* ARM64Instr_VQfromX ( HReg rQ, HReg rXlo ) {
1308 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1309 i->tag = ARM64in_VQfromX;
1310 i->ARM64in.VQfromX.rQ = rQ;
1311 i->ARM64in.VQfromX.rXlo = rXlo;
1312 return i;
1313 }
ARM64Instr_VQfromXX(HReg rQ,HReg rXhi,HReg rXlo)1314 ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) {
1315 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1316 i->tag = ARM64in_VQfromXX;
1317 i->ARM64in.VQfromXX.rQ = rQ;
1318 i->ARM64in.VQfromXX.rXhi = rXhi;
1319 i->ARM64in.VQfromXX.rXlo = rXlo;
1320 return i;
1321 }
ARM64Instr_VXfromQ(HReg rX,HReg rQ,UInt laneNo)1322 ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
1323 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1324 i->tag = ARM64in_VXfromQ;
1325 i->ARM64in.VXfromQ.rX = rX;
1326 i->ARM64in.VXfromQ.rQ = rQ;
1327 i->ARM64in.VXfromQ.laneNo = laneNo;
1328 vassert(laneNo <= 1);
1329 return i;
1330 }
ARM64Instr_VXfromDorS(HReg rX,HReg rDorS,Bool fromD)1331 ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ) {
1332 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1333 i->tag = ARM64in_VXfromDorS;
1334 i->ARM64in.VXfromDorS.rX = rX;
1335 i->ARM64in.VXfromDorS.rDorS = rDorS;
1336 i->ARM64in.VXfromDorS.fromD = fromD;
1337 return i;
1338 }
ARM64Instr_VMov(UInt szB,HReg dst,HReg src)1339 ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
1340 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1341 i->tag = ARM64in_VMov;
1342 i->ARM64in.VMov.szB = szB;
1343 i->ARM64in.VMov.dst = dst;
1344 i->ARM64in.VMov.src = src;
1345 switch (szB) {
1346 case 16:
1347 vassert(hregClass(src) == HRcVec128);
1348 vassert(hregClass(dst) == HRcVec128);
1349 break;
1350 case 8:
1351 vassert(hregClass(src) == HRcFlt64);
1352 vassert(hregClass(dst) == HRcFlt64);
1353 break;
1354 default:
1355 vpanic("ARM64Instr_VMov");
1356 }
1357 return i;
1358 }
ARM64Instr_EvCheck(ARM64AMode * amCounter,ARM64AMode * amFailAddr)1359 ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
1360 ARM64AMode* amFailAddr ) {
1361 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1362 i->tag = ARM64in_EvCheck;
1363 i->ARM64in.EvCheck.amCounter = amCounter;
1364 i->ARM64in.EvCheck.amFailAddr = amFailAddr;
1365 return i;
1366 }
ARM64Instr_ProfInc(void)1367 ARM64Instr* ARM64Instr_ProfInc ( void ) {
1368 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1369 i->tag = ARM64in_ProfInc;
1370 return i;
1371 }
1372
1373 /* ... */
1374
ppARM64Instr(const ARM64Instr * i)1375 void ppARM64Instr ( const ARM64Instr* i ) {
1376 switch (i->tag) {
1377 case ARM64in_Arith:
1378 vex_printf("%s ", i->ARM64in.Arith.isAdd ? "add" : "sub");
1379 ppHRegARM64(i->ARM64in.Arith.dst);
1380 vex_printf(", ");
1381 ppHRegARM64(i->ARM64in.Arith.argL);
1382 vex_printf(", ");
1383 ppARM64RIA(i->ARM64in.Arith.argR);
1384 return;
1385 case ARM64in_Cmp:
1386 vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? " " : "(w)" );
1387 ppHRegARM64(i->ARM64in.Cmp.argL);
1388 vex_printf(", ");
1389 ppARM64RIA(i->ARM64in.Cmp.argR);
1390 return;
1391 case ARM64in_Logic:
1392 vex_printf("%s ", showARM64LogicOp(i->ARM64in.Logic.op));
1393 ppHRegARM64(i->ARM64in.Logic.dst);
1394 vex_printf(", ");
1395 ppHRegARM64(i->ARM64in.Logic.argL);
1396 vex_printf(", ");
1397 ppARM64RIL(i->ARM64in.Logic.argR);
1398 return;
1399 case ARM64in_Test:
1400 vex_printf("tst ");
1401 ppHRegARM64(i->ARM64in.Test.argL);
1402 vex_printf(", ");
1403 ppARM64RIL(i->ARM64in.Test.argR);
1404 return;
1405 case ARM64in_Shift:
1406 vex_printf("%s ", showARM64ShiftOp(i->ARM64in.Shift.op));
1407 ppHRegARM64(i->ARM64in.Shift.dst);
1408 vex_printf(", ");
1409 ppHRegARM64(i->ARM64in.Shift.argL);
1410 vex_printf(", ");
1411 ppARM64RI6(i->ARM64in.Shift.argR);
1412 return;
1413 case ARM64in_Unary:
1414 vex_printf("%s ", showARM64UnaryOp(i->ARM64in.Unary.op));
1415 ppHRegARM64(i->ARM64in.Unary.dst);
1416 vex_printf(", ");
1417 ppHRegARM64(i->ARM64in.Unary.src);
1418 return;
1419 case ARM64in_MovI:
1420 vex_printf("mov ");
1421 ppHRegARM64(i->ARM64in.MovI.dst);
1422 vex_printf(", ");
1423 ppHRegARM64(i->ARM64in.MovI.src);
1424 return;
1425 case ARM64in_Imm64:
1426 vex_printf("imm64 ");
1427 ppHRegARM64(i->ARM64in.Imm64.dst);
1428 vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64);
1429 return;
1430 case ARM64in_LdSt64:
1431 if (i->ARM64in.LdSt64.isLoad) {
1432 vex_printf("ldr ");
1433 ppHRegARM64(i->ARM64in.LdSt64.rD);
1434 vex_printf(", ");
1435 ppARM64AMode(i->ARM64in.LdSt64.amode);
1436 } else {
1437 vex_printf("str ");
1438 ppARM64AMode(i->ARM64in.LdSt64.amode);
1439 vex_printf(", ");
1440 ppHRegARM64(i->ARM64in.LdSt64.rD);
1441 }
1442 return;
1443 case ARM64in_LdSt32:
1444 if (i->ARM64in.LdSt32.isLoad) {
1445 vex_printf("ldruw ");
1446 ppHRegARM64(i->ARM64in.LdSt32.rD);
1447 vex_printf(", ");
1448 ppARM64AMode(i->ARM64in.LdSt32.amode);
1449 } else {
1450 vex_printf("strw ");
1451 ppARM64AMode(i->ARM64in.LdSt32.amode);
1452 vex_printf(", ");
1453 ppHRegARM64(i->ARM64in.LdSt32.rD);
1454 }
1455 return;
1456 case ARM64in_LdSt16:
1457 if (i->ARM64in.LdSt16.isLoad) {
1458 vex_printf("ldruh ");
1459 ppHRegARM64(i->ARM64in.LdSt16.rD);
1460 vex_printf(", ");
1461 ppARM64AMode(i->ARM64in.LdSt16.amode);
1462 } else {
1463 vex_printf("strh ");
1464 ppARM64AMode(i->ARM64in.LdSt16.amode);
1465 vex_printf(", ");
1466 ppHRegARM64(i->ARM64in.LdSt16.rD);
1467 }
1468 return;
1469 case ARM64in_LdSt8:
1470 if (i->ARM64in.LdSt8.isLoad) {
1471 vex_printf("ldrub ");
1472 ppHRegARM64(i->ARM64in.LdSt8.rD);
1473 vex_printf(", ");
1474 ppARM64AMode(i->ARM64in.LdSt8.amode);
1475 } else {
1476 vex_printf("strb ");
1477 ppARM64AMode(i->ARM64in.LdSt8.amode);
1478 vex_printf(", ");
1479 ppHRegARM64(i->ARM64in.LdSt8.rD);
1480 }
1481 return;
1482 case ARM64in_XDirect:
1483 vex_printf("(xDirect) ");
1484 vex_printf("if (%%pstate.%s) { ",
1485 showARM64CondCode(i->ARM64in.XDirect.cond));
1486 vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA);
1487 vex_printf("str x9,");
1488 ppARM64AMode(i->ARM64in.XDirect.amPC);
1489 vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
1490 i->ARM64in.XDirect.toFastEP ? "fast" : "slow");
1491 vex_printf("blr x9 }");
1492 return;
1493 case ARM64in_XIndir:
1494 vex_printf("(xIndir) ");
1495 vex_printf("if (%%pstate.%s) { ",
1496 showARM64CondCode(i->ARM64in.XIndir.cond));
1497 vex_printf("str ");
1498 ppHRegARM64(i->ARM64in.XIndir.dstGA);
1499 vex_printf(",");
1500 ppARM64AMode(i->ARM64in.XIndir.amPC);
1501 vex_printf("; imm64 x9,$disp_cp_xindir; ");
1502 vex_printf("br x9 }");
1503 return;
1504 case ARM64in_XAssisted:
1505 vex_printf("(xAssisted) ");
1506 vex_printf("if (%%pstate.%s) { ",
1507 showARM64CondCode(i->ARM64in.XAssisted.cond));
1508 vex_printf("str ");
1509 ppHRegARM64(i->ARM64in.XAssisted.dstGA);
1510 vex_printf(",");
1511 ppARM64AMode(i->ARM64in.XAssisted.amPC);
1512 vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
1513 (Int)i->ARM64in.XAssisted.jk);
1514 vex_printf("imm64 x9,$disp_cp_xassisted; ");
1515 vex_printf("br x9 }");
1516 return;
1517 case ARM64in_CSel:
1518 vex_printf("csel ");
1519 ppHRegARM64(i->ARM64in.CSel.dst);
1520 vex_printf(", ");
1521 ppHRegARM64(i->ARM64in.CSel.argL);
1522 vex_printf(", ");
1523 ppHRegARM64(i->ARM64in.CSel.argR);
1524 vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond));
1525 return;
1526 case ARM64in_Call:
1527 vex_printf("call%s ",
1528 i->ARM64in.Call.cond==ARM64cc_AL
1529 ? " " : showARM64CondCode(i->ARM64in.Call.cond));
1530 vex_printf("0x%llx [nArgRegs=%d, ",
1531 i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs);
1532 ppRetLoc(i->ARM64in.Call.rloc);
1533 vex_printf("]");
1534 return;
1535 case ARM64in_AddToSP: {
1536 Int simm = i->ARM64in.AddToSP.simm;
1537 vex_printf("%s xsp, xsp, #%d", simm < 0 ? "sub" : "add",
1538 simm < 0 ? -simm : simm);
1539 return;
1540 }
1541 case ARM64in_FromSP:
1542 vex_printf("mov ");
1543 ppHRegARM64(i->ARM64in.FromSP.dst);
1544 vex_printf(", xsp");
1545 return;
1546 case ARM64in_Mul:
1547 vex_printf("%s ", showARM64MulOp(i->ARM64in.Mul.op));
1548 ppHRegARM64(i->ARM64in.Mul.dst);
1549 vex_printf(", ");
1550 ppHRegARM64(i->ARM64in.Mul.argL);
1551 vex_printf(", ");
1552 ppHRegARM64(i->ARM64in.Mul.argR);
1553 return;
1554
1555 case ARM64in_LdrEX: {
1556 const HChar* sz = " ";
1557 switch (i->ARM64in.LdrEX.szB) {
1558 case 1: sz = "b"; break;
1559 case 2: sz = "h"; break;
1560 case 4: case 8: break;
1561 default: vassert(0);
1562 }
1563 vex_printf("ldxr%s %c2, [x4]",
1564 sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w');
1565 return;
1566 }
1567 case ARM64in_StrEX: {
1568 const HChar* sz = " ";
1569 switch (i->ARM64in.StrEX.szB) {
1570 case 1: sz = "b"; break;
1571 case 2: sz = "h"; break;
1572 case 4: case 8: break;
1573 default: vassert(0);
1574 }
1575 vex_printf("stxr%s w0, %c2, [x4]",
1576 sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
1577 return;
1578 }
1579 case ARM64in_CAS: {
1580 vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB);
1581 return;
1582 }
1583 case ARM64in_MFence:
1584 vex_printf("(mfence) dsb sy; dmb sy; isb");
1585 return;
1586 case ARM64in_ClrEX:
1587 vex_printf("clrex #15");
1588 return;
1589 case ARM64in_VLdStH:
1590 if (i->ARM64in.VLdStH.isLoad) {
1591 vex_printf("ldr ");
1592 ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
1593 vex_printf(", %u(", i->ARM64in.VLdStH.uimm12);
1594 ppHRegARM64(i->ARM64in.VLdStH.rN);
1595 vex_printf(")");
1596 } else {
1597 vex_printf("str ");
1598 vex_printf("%u(", i->ARM64in.VLdStH.uimm12);
1599 ppHRegARM64(i->ARM64in.VLdStH.rN);
1600 vex_printf("), ");
1601 ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
1602 }
1603 return;
1604 case ARM64in_VLdStS:
1605 if (i->ARM64in.VLdStS.isLoad) {
1606 vex_printf("ldr ");
1607 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1608 vex_printf(", %u(", i->ARM64in.VLdStS.uimm12);
1609 ppHRegARM64(i->ARM64in.VLdStS.rN);
1610 vex_printf(")");
1611 } else {
1612 vex_printf("str ");
1613 vex_printf("%u(", i->ARM64in.VLdStS.uimm12);
1614 ppHRegARM64(i->ARM64in.VLdStS.rN);
1615 vex_printf("), ");
1616 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1617 }
1618 return;
1619 case ARM64in_VLdStD:
1620 if (i->ARM64in.VLdStD.isLoad) {
1621 vex_printf("ldr ");
1622 ppHRegARM64(i->ARM64in.VLdStD.dD);
1623 vex_printf(", %u(", i->ARM64in.VLdStD.uimm12);
1624 ppHRegARM64(i->ARM64in.VLdStD.rN);
1625 vex_printf(")");
1626 } else {
1627 vex_printf("str ");
1628 vex_printf("%u(", i->ARM64in.VLdStD.uimm12);
1629 ppHRegARM64(i->ARM64in.VLdStD.rN);
1630 vex_printf("), ");
1631 ppHRegARM64(i->ARM64in.VLdStD.dD);
1632 }
1633 return;
1634 case ARM64in_VLdStQ:
1635 if (i->ARM64in.VLdStQ.isLoad)
1636 vex_printf("ld1.2d {");
1637 else
1638 vex_printf("st1.2d {");
1639 ppHRegARM64(i->ARM64in.VLdStQ.rQ);
1640 vex_printf("}, [");
1641 ppHRegARM64(i->ARM64in.VLdStQ.rN);
1642 vex_printf("]");
1643 return;
1644 case ARM64in_VCvtI2F: {
1645 HChar syn = '?';
1646 UInt fszB = 0;
1647 UInt iszB = 0;
1648 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how);
1649 vex_printf("%ccvtf ", syn);
1650 ppHRegARM64(i->ARM64in.VCvtI2F.rD);
1651 vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D');
1652 ppHRegARM64(i->ARM64in.VCvtI2F.rS);
1653 vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X');
1654 return;
1655 }
1656 case ARM64in_VCvtF2I: {
1657 HChar syn = '?';
1658 UInt fszB = 0;
1659 UInt iszB = 0;
1660 HChar rmo = '?';
1661 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how);
1662 UChar armRM = i->ARM64in.VCvtF2I.armRM;
1663 if (armRM < 4) rmo = "npmz"[armRM];
1664 vex_printf("fcvt%c%c ", rmo, syn);
1665 ppHRegARM64(i->ARM64in.VCvtF2I.rD);
1666 vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X');
1667 ppHRegARM64(i->ARM64in.VCvtF2I.rS);
1668 vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D');
1669 return;
1670 }
1671 case ARM64in_VCvtSD:
1672 vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s");
1673 if (i->ARM64in.VCvtSD.sToD) {
1674 ppHRegARM64(i->ARM64in.VCvtSD.dst);
1675 vex_printf(", ");
1676 ppHRegARM64asSreg(i->ARM64in.VCvtSD.src);
1677 } else {
1678 ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst);
1679 vex_printf(", ");
1680 ppHRegARM64(i->ARM64in.VCvtSD.src);
1681 }
1682 return;
1683 case ARM64in_VCvtHS:
1684 vex_printf("fcvt%s ", i->ARM64in.VCvtHS.hToS ? "h2s" : "s2h");
1685 if (i->ARM64in.VCvtHS.hToS) {
1686 ppHRegARM64asSreg(i->ARM64in.VCvtHS.dst);
1687 vex_printf(", ");
1688 ppHRegARM64asHreg(i->ARM64in.VCvtHS.src);
1689 } else {
1690 ppHRegARM64asHreg(i->ARM64in.VCvtHS.dst);
1691 vex_printf(", ");
1692 ppHRegARM64asSreg(i->ARM64in.VCvtHS.src);
1693 }
1694 return;
1695 case ARM64in_VCvtHD:
1696 vex_printf("fcvt%s ", i->ARM64in.VCvtHD.hToD ? "h2d" : "d2h");
1697 if (i->ARM64in.VCvtHD.hToD) {
1698 ppHRegARM64(i->ARM64in.VCvtHD.dst);
1699 vex_printf(", ");
1700 ppHRegARM64asHreg(i->ARM64in.VCvtHD.src);
1701 } else {
1702 ppHRegARM64asHreg(i->ARM64in.VCvtHD.dst);
1703 vex_printf(", ");
1704 ppHRegARM64(i->ARM64in.VCvtHD.src);
1705 }
1706 return;
1707 case ARM64in_VUnaryD:
1708 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
1709 ppHRegARM64(i->ARM64in.VUnaryD.dst);
1710 vex_printf(", ");
1711 ppHRegARM64(i->ARM64in.VUnaryD.src);
1712 return;
1713 case ARM64in_VUnaryS:
1714 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op));
1715 ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst);
1716 vex_printf(", ");
1717 ppHRegARM64asSreg(i->ARM64in.VUnaryS.src);
1718 return;
1719 case ARM64in_VBinD:
1720 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinD.op));
1721 ppHRegARM64(i->ARM64in.VBinD.dst);
1722 vex_printf(", ");
1723 ppHRegARM64(i->ARM64in.VBinD.argL);
1724 vex_printf(", ");
1725 ppHRegARM64(i->ARM64in.VBinD.argR);
1726 return;
1727 case ARM64in_VBinS:
1728 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinS.op));
1729 ppHRegARM64asSreg(i->ARM64in.VBinS.dst);
1730 vex_printf(", ");
1731 ppHRegARM64asSreg(i->ARM64in.VBinS.argL);
1732 vex_printf(", ");
1733 ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
1734 return;
1735 case ARM64in_VCmpD:
1736 vex_printf("fcmp ");
1737 ppHRegARM64(i->ARM64in.VCmpD.argL);
1738 vex_printf(", ");
1739 ppHRegARM64(i->ARM64in.VCmpD.argR);
1740 return;
1741 case ARM64in_VCmpS:
1742 vex_printf("fcmp ");
1743 ppHRegARM64asSreg(i->ARM64in.VCmpS.argL);
1744 vex_printf(", ");
1745 ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
1746 return;
1747 case ARM64in_VFCSel: {
1748 void (*ppHRegARM64fp)(HReg)
1749 = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg);
1750 vex_printf("fcsel ");
1751 ppHRegARM64fp(i->ARM64in.VFCSel.dst);
1752 vex_printf(", ");
1753 ppHRegARM64fp(i->ARM64in.VFCSel.argL);
1754 vex_printf(", ");
1755 ppHRegARM64fp(i->ARM64in.VFCSel.argR);
1756 vex_printf(", %s", showARM64CondCode(i->ARM64in.VFCSel.cond));
1757 return;
1758 }
1759 case ARM64in_FPCR:
1760 if (i->ARM64in.FPCR.toFPCR) {
1761 vex_printf("msr fpcr, ");
1762 ppHRegARM64(i->ARM64in.FPCR.iReg);
1763 } else {
1764 vex_printf("mrs ");
1765 ppHRegARM64(i->ARM64in.FPCR.iReg);
1766 vex_printf(", fpcr");
1767 }
1768 return;
1769 case ARM64in_FPSR:
1770 if (i->ARM64in.FPSR.toFPSR) {
1771 vex_printf("msr fpsr, ");
1772 ppHRegARM64(i->ARM64in.FPSR.iReg);
1773 } else {
1774 vex_printf("mrs ");
1775 ppHRegARM64(i->ARM64in.FPSR.iReg);
1776 vex_printf(", fpsr");
1777 }
1778 return;
1779 case ARM64in_VBinV: {
1780 const HChar* nm = "??";
1781 const HChar* ar = "??";
1782 showARM64VecBinOp(&nm, &ar, i->ARM64in.VBinV.op);
1783 vex_printf("%s ", nm);
1784 ppHRegARM64(i->ARM64in.VBinV.dst);
1785 vex_printf(".%s, ", ar);
1786 ppHRegARM64(i->ARM64in.VBinV.argL);
1787 vex_printf(".%s, ", ar);
1788 ppHRegARM64(i->ARM64in.VBinV.argR);
1789 vex_printf(".%s", ar);
1790 return;
1791 }
1792 case ARM64in_VModifyV: {
1793 const HChar* nm = "??";
1794 const HChar* ar = "??";
1795 showARM64VecModifyOp(&nm, &ar, i->ARM64in.VModifyV.op);
1796 vex_printf("%s ", nm);
1797 ppHRegARM64(i->ARM64in.VModifyV.mod);
1798 vex_printf(".%s, ", ar);
1799 ppHRegARM64(i->ARM64in.VModifyV.arg);
1800 vex_printf(".%s", ar);
1801 return;
1802 }
1803 case ARM64in_VUnaryV: {
1804 const HChar* nm = "??";
1805 const HChar* ar = "??";
1806 showARM64VecUnaryOp(&nm, &ar, i->ARM64in.VUnaryV.op);
1807 vex_printf("%s ", nm);
1808 ppHRegARM64(i->ARM64in.VUnaryV.dst);
1809 vex_printf(".%s, ", ar);
1810 ppHRegARM64(i->ARM64in.VUnaryV.arg);
1811 vex_printf(".%s", ar);
1812 return;
1813 }
1814 case ARM64in_VNarrowV: {
1815 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
1816 const HChar* darr[3] = { "8b", "4h", "2s" };
1817 const HChar* sarr[3] = { "8h", "4s", "2d" };
1818 const HChar* nm = showARM64VecNarrowOp(i->ARM64in.VNarrowV.op);
1819 vex_printf("%s ", nm);
1820 ppHRegARM64(i->ARM64in.VNarrowV.dst);
1821 vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??");
1822 ppHRegARM64(i->ARM64in.VNarrowV.src);
1823 vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??");
1824 return;
1825 }
1826 case ARM64in_VShiftImmV: {
1827 const HChar* nm = "??";
1828 const HChar* ar = "??";
1829 showARM64VecShiftImmOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
1830 vex_printf("%s ", nm);
1831 ppHRegARM64(i->ARM64in.VShiftImmV.dst);
1832 vex_printf(".%s, ", ar);
1833 ppHRegARM64(i->ARM64in.VShiftImmV.src);
1834 vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
1835 return;
1836 }
1837 case ARM64in_VExtV: {
1838 vex_printf("ext ");
1839 ppHRegARM64(i->ARM64in.VExtV.dst);
1840 vex_printf(".16b, ");
1841 ppHRegARM64(i->ARM64in.VExtV.srcLo);
1842 vex_printf(".16b, ");
1843 ppHRegARM64(i->ARM64in.VExtV.srcHi);
1844 vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB);
1845 return;
1846 }
1847 case ARM64in_VImmQ:
1848 vex_printf("qimm ");
1849 ppHRegARM64(i->ARM64in.VImmQ.rQ);
1850 vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm);
1851 return;
1852 case ARM64in_VDfromX:
1853 vex_printf("fmov ");
1854 ppHRegARM64(i->ARM64in.VDfromX.rD);
1855 vex_printf(", ");
1856 ppHRegARM64(i->ARM64in.VDfromX.rX);
1857 return;
1858 case ARM64in_VQfromX:
1859 vex_printf("fmov ");
1860 ppHRegARM64(i->ARM64in.VQfromX.rQ);
1861 vex_printf(".d[0], ");
1862 ppHRegARM64(i->ARM64in.VQfromX.rXlo);
1863 return;
1864 case ARM64in_VQfromXX:
1865 vex_printf("qFromXX ");
1866 ppHRegARM64(i->ARM64in.VQfromXX.rQ);
1867 vex_printf(", ");
1868 ppHRegARM64(i->ARM64in.VQfromXX.rXhi);
1869 vex_printf(", ");
1870 ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
1871 return;
1872 case ARM64in_VXfromQ:
1873 vex_printf("fmov ");
1874 ppHRegARM64(i->ARM64in.VXfromQ.rX);
1875 vex_printf(", ");
1876 ppHRegARM64(i->ARM64in.VXfromQ.rQ);
1877 vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
1878 return;
1879 case ARM64in_VXfromDorS:
1880 vex_printf("fmov ");
1881 ppHRegARM64(i->ARM64in.VXfromDorS.rX);
1882 vex_printf("(%c-reg), ", i->ARM64in.VXfromDorS.fromD ? 'X':'W');
1883 ppHRegARM64(i->ARM64in.VXfromDorS.rDorS);
1884 vex_printf("(%c-reg)", i->ARM64in.VXfromDorS.fromD ? 'D' : 'S');
1885 return;
1886 case ARM64in_VMov: {
1887 UChar aux = '?';
1888 switch (i->ARM64in.VMov.szB) {
1889 case 16: aux = 'q'; break;
1890 case 8: aux = 'd'; break;
1891 case 4: aux = 's'; break;
1892 default: break;
1893 }
1894 vex_printf("mov(%c) ", aux);
1895 ppHRegARM64(i->ARM64in.VMov.dst);
1896 vex_printf(", ");
1897 ppHRegARM64(i->ARM64in.VMov.src);
1898 return;
1899 }
1900 case ARM64in_EvCheck:
1901 vex_printf("(evCheck) ldr w9,");
1902 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
1903 vex_printf("; subs w9,w9,$1; str w9,");
1904 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
1905 vex_printf("; bpl nofail; ldr x9,");
1906 ppARM64AMode(i->ARM64in.EvCheck.amFailAddr);
1907 vex_printf("; br x9; nofail:");
1908 return;
1909 case ARM64in_ProfInc:
1910 vex_printf("(profInc) imm64-fixed4 x9,$NotKnownYet; "
1911 "ldr x8,[x9]; add x8,x8,#1, str x8,[x9]");
1912 return;
1913 default:
1914 vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag);
1915 vpanic("ppARM64Instr(1)");
1916 return;
1917 }
1918 }
1919
1920
1921 /* --------- Helpers for register allocation. --------- */
1922
getRegUsage_ARM64Instr(HRegUsage * u,const ARM64Instr * i,Bool mode64)1923 void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
1924 {
1925 vassert(mode64 == True);
1926 initHRegUsage(u);
1927 switch (i->tag) {
1928 case ARM64in_Arith:
1929 addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst);
1930 addHRegUse(u, HRmRead, i->ARM64in.Arith.argL);
1931 addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR);
1932 return;
1933 case ARM64in_Cmp:
1934 addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL);
1935 addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR);
1936 return;
1937 case ARM64in_Logic:
1938 addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst);
1939 addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
1940 addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
1941 return;
1942 case ARM64in_Test:
1943 addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
1944 addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
1945 return;
1946 case ARM64in_Shift:
1947 addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst);
1948 addHRegUse(u, HRmRead, i->ARM64in.Shift.argL);
1949 addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR);
1950 return;
1951 case ARM64in_Unary:
1952 addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst);
1953 addHRegUse(u, HRmRead, i->ARM64in.Unary.src);
1954 return;
1955 case ARM64in_MovI:
1956 addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
1957 addHRegUse(u, HRmRead, i->ARM64in.MovI.src);
1958 return;
1959 case ARM64in_Imm64:
1960 addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
1961 return;
1962 case ARM64in_LdSt64:
1963 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode);
1964 if (i->ARM64in.LdSt64.isLoad) {
1965 addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD);
1966 } else {
1967 addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD);
1968 }
1969 return;
1970 case ARM64in_LdSt32:
1971 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode);
1972 if (i->ARM64in.LdSt32.isLoad) {
1973 addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD);
1974 } else {
1975 addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD);
1976 }
1977 return;
1978 case ARM64in_LdSt16:
1979 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode);
1980 if (i->ARM64in.LdSt16.isLoad) {
1981 addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD);
1982 } else {
1983 addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD);
1984 }
1985 return;
1986 case ARM64in_LdSt8:
1987 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode);
1988 if (i->ARM64in.LdSt8.isLoad) {
1989 addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD);
1990 } else {
1991 addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD);
1992 }
1993 return;
1994 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1995 conditionally exit the block. Hence we only need to list (1)
1996 the registers that they read, and (2) the registers that they
1997 write in the case where the block is not exited. (2) is
1998 empty, hence only (1) is relevant here. */
1999 case ARM64in_XDirect:
2000 addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC);
2001 return;
2002 case ARM64in_XIndir:
2003 addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA);
2004 addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC);
2005 return;
2006 case ARM64in_XAssisted:
2007 addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA);
2008 addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC);
2009 return;
2010 case ARM64in_CSel:
2011 addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst);
2012 addHRegUse(u, HRmRead, i->ARM64in.CSel.argL);
2013 addHRegUse(u, HRmRead, i->ARM64in.CSel.argR);
2014 return;
2015 case ARM64in_Call:
2016 /* logic and comments copied/modified from x86 back end */
2017 /* This is a bit subtle. */
2018 /* First off, claim it trashes all the caller-saved regs
2019 which fall within the register allocator's jurisdiction.
2020 These I believe to be x0 to x7 and the 128-bit vector
2021 registers in use, q16 .. q20. */
2022 addHRegUse(u, HRmWrite, hregARM64_X0());
2023 addHRegUse(u, HRmWrite, hregARM64_X1());
2024 addHRegUse(u, HRmWrite, hregARM64_X2());
2025 addHRegUse(u, HRmWrite, hregARM64_X3());
2026 addHRegUse(u, HRmWrite, hregARM64_X4());
2027 addHRegUse(u, HRmWrite, hregARM64_X5());
2028 addHRegUse(u, HRmWrite, hregARM64_X6());
2029 addHRegUse(u, HRmWrite, hregARM64_X7());
2030 addHRegUse(u, HRmWrite, hregARM64_Q16());
2031 addHRegUse(u, HRmWrite, hregARM64_Q17());
2032 addHRegUse(u, HRmWrite, hregARM64_Q18());
2033 addHRegUse(u, HRmWrite, hregARM64_Q19());
2034 addHRegUse(u, HRmWrite, hregARM64_Q20());
2035 /* Now we have to state any parameter-carrying registers
2036 which might be read. This depends on nArgRegs. */
2037 switch (i->ARM64in.Call.nArgRegs) {
2038 case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/
2039 case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/
2040 case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/
2041 case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/
2042 case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/
2043 case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/
2044 case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/
2045 case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break;
2046 case 0: break;
2047 default: vpanic("getRegUsage_ARM64:Call:regparms");
2048 }
2049 /* Finally, there is the issue that the insn trashes a
2050 register because the literal target address has to be
2051 loaded into a register. However, we reserve x9 for that
2052 purpose so there's no further complexity here. Stating x9
2053 as trashed is pointless since it's not under the control
2054 of the allocator, but what the hell. */
2055 addHRegUse(u, HRmWrite, hregARM64_X9());
2056 return;
2057 case ARM64in_AddToSP:
2058 /* Only changes SP, but regalloc doesn't control that, hence
2059 we don't care. */
2060 return;
2061 case ARM64in_FromSP:
2062 addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst);
2063 return;
2064 case ARM64in_Mul:
2065 addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst);
2066 addHRegUse(u, HRmRead, i->ARM64in.Mul.argL);
2067 addHRegUse(u, HRmRead, i->ARM64in.Mul.argR);
2068 return;
2069 case ARM64in_LdrEX:
2070 addHRegUse(u, HRmRead, hregARM64_X4());
2071 addHRegUse(u, HRmWrite, hregARM64_X2());
2072 return;
2073 case ARM64in_StrEX:
2074 addHRegUse(u, HRmRead, hregARM64_X4());
2075 addHRegUse(u, HRmWrite, hregARM64_X0());
2076 addHRegUse(u, HRmRead, hregARM64_X2());
2077 return;
2078 case ARM64in_CAS:
2079 addHRegUse(u, HRmRead, hregARM64_X3());
2080 addHRegUse(u, HRmRead, hregARM64_X5());
2081 addHRegUse(u, HRmRead, hregARM64_X7());
2082 addHRegUse(u, HRmWrite, hregARM64_X1());
2083 /* Pointless to state this since X8 is not available to RA. */
2084 addHRegUse(u, HRmWrite, hregARM64_X8());
2085 break;
2086 case ARM64in_MFence:
2087 return;
2088 case ARM64in_ClrEX:
2089 return;
2090 case ARM64in_VLdStH:
2091 addHRegUse(u, HRmRead, i->ARM64in.VLdStH.rN);
2092 if (i->ARM64in.VLdStH.isLoad) {
2093 addHRegUse(u, HRmWrite, i->ARM64in.VLdStH.hD);
2094 } else {
2095 addHRegUse(u, HRmRead, i->ARM64in.VLdStH.hD);
2096 }
2097 return;
2098 case ARM64in_VLdStS:
2099 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
2100 if (i->ARM64in.VLdStS.isLoad) {
2101 addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD);
2102 } else {
2103 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD);
2104 }
2105 return;
2106 case ARM64in_VLdStD:
2107 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN);
2108 if (i->ARM64in.VLdStD.isLoad) {
2109 addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD);
2110 } else {
2111 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD);
2112 }
2113 return;
2114 case ARM64in_VLdStQ:
2115 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN);
2116 if (i->ARM64in.VLdStQ.isLoad)
2117 addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ);
2118 else
2119 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ);
2120 return;
2121 case ARM64in_VCvtI2F:
2122 addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS);
2123 addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD);
2124 return;
2125 case ARM64in_VCvtF2I:
2126 addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS);
2127 addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD);
2128 return;
2129 case ARM64in_VCvtSD:
2130 addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
2131 addHRegUse(u, HRmRead, i->ARM64in.VCvtSD.src);
2132 return;
2133 case ARM64in_VCvtHS:
2134 addHRegUse(u, HRmWrite, i->ARM64in.VCvtHS.dst);
2135 addHRegUse(u, HRmRead, i->ARM64in.VCvtHS.src);
2136 return;
2137 case ARM64in_VCvtHD:
2138 addHRegUse(u, HRmWrite, i->ARM64in.VCvtHD.dst);
2139 addHRegUse(u, HRmRead, i->ARM64in.VCvtHD.src);
2140 return;
2141 case ARM64in_VUnaryD:
2142 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
2143 addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
2144 return;
2145 case ARM64in_VUnaryS:
2146 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst);
2147 addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src);
2148 return;
2149 case ARM64in_VBinD:
2150 addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst);
2151 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL);
2152 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR);
2153 return;
2154 case ARM64in_VBinS:
2155 addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst);
2156 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
2157 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
2158 return;
2159 case ARM64in_VCmpD:
2160 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
2161 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
2162 return;
2163 case ARM64in_VCmpS:
2164 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL);
2165 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR);
2166 return;
2167 case ARM64in_VFCSel:
2168 addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argL);
2169 addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argR);
2170 addHRegUse(u, HRmWrite, i->ARM64in.VFCSel.dst);
2171 return;
2172 case ARM64in_FPCR:
2173 if (i->ARM64in.FPCR.toFPCR)
2174 addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg);
2175 else
2176 addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg);
2177 return;
2178 case ARM64in_FPSR:
2179 if (i->ARM64in.FPSR.toFPSR)
2180 addHRegUse(u, HRmRead, i->ARM64in.FPSR.iReg);
2181 else
2182 addHRegUse(u, HRmWrite, i->ARM64in.FPSR.iReg);
2183 return;
2184 case ARM64in_VBinV:
2185 addHRegUse(u, HRmWrite, i->ARM64in.VBinV.dst);
2186 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL);
2187 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR);
2188 return;
2189 case ARM64in_VModifyV:
2190 addHRegUse(u, HRmWrite, i->ARM64in.VModifyV.mod);
2191 addHRegUse(u, HRmRead, i->ARM64in.VModifyV.mod);
2192 addHRegUse(u, HRmRead, i->ARM64in.VModifyV.arg);
2193 return;
2194 case ARM64in_VUnaryV:
2195 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst);
2196 addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg);
2197 return;
2198 case ARM64in_VNarrowV:
2199 addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst);
2200 addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src);
2201 return;
2202 case ARM64in_VShiftImmV:
2203 addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
2204 addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
2205 return;
2206 case ARM64in_VExtV:
2207 addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst);
2208 addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo);
2209 addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi);
2210 return;
2211 case ARM64in_VImmQ:
2212 addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ);
2213 return;
2214 case ARM64in_VDfromX:
2215 addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD);
2216 addHRegUse(u, HRmRead, i->ARM64in.VDfromX.rX);
2217 return;
2218 case ARM64in_VQfromX:
2219 addHRegUse(u, HRmWrite, i->ARM64in.VQfromX.rQ);
2220 addHRegUse(u, HRmRead, i->ARM64in.VQfromX.rXlo);
2221 return;
2222 case ARM64in_VQfromXX:
2223 addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ);
2224 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXhi);
2225 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXlo);
2226 return;
2227 case ARM64in_VXfromQ:
2228 addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
2229 addHRegUse(u, HRmRead, i->ARM64in.VXfromQ.rQ);
2230 return;
2231 case ARM64in_VXfromDorS:
2232 addHRegUse(u, HRmWrite, i->ARM64in.VXfromDorS.rX);
2233 addHRegUse(u, HRmRead, i->ARM64in.VXfromDorS.rDorS);
2234 return;
2235 case ARM64in_VMov:
2236 addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
2237 addHRegUse(u, HRmRead, i->ARM64in.VMov.src);
2238 return;
2239 case ARM64in_EvCheck:
2240 /* We expect both amodes only to mention x21, so this is in
2241 fact pointless, since x21 isn't allocatable, but
2242 anyway.. */
2243 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter);
2244 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr);
2245 addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */
2246 return;
2247 case ARM64in_ProfInc:
2248 /* Again, pointless to actually state these since neither
2249 is available to RA. */
2250 addHRegUse(u, HRmWrite, hregARM64_X9()); /* unavail to RA */
2251 addHRegUse(u, HRmWrite, hregARM64_X8()); /* unavail to RA */
2252 return;
2253 default:
2254 ppARM64Instr(i);
2255 vpanic("getRegUsage_ARM64Instr");
2256 }
2257 }
2258
2259
mapRegs_ARM64Instr(HRegRemap * m,ARM64Instr * i,Bool mode64)2260 void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
2261 {
2262 vassert(mode64 == True);
2263 switch (i->tag) {
2264 case ARM64in_Arith:
2265 i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst);
2266 i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL);
2267 mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR);
2268 return;
2269 case ARM64in_Cmp:
2270 i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL);
2271 mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR);
2272 return;
2273 case ARM64in_Logic:
2274 i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst);
2275 i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
2276 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2277 return;
2278 case ARM64in_Test:
2279 i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
2280 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2281 return;
2282 case ARM64in_Shift:
2283 i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst);
2284 i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL);
2285 mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR);
2286 return;
2287 case ARM64in_Unary:
2288 i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst);
2289 i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src);
2290 return;
2291 case ARM64in_MovI:
2292 i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst);
2293 i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src);
2294 return;
2295 case ARM64in_Imm64:
2296 i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst);
2297 return;
2298 case ARM64in_LdSt64:
2299 i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD);
2300 mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode);
2301 return;
2302 case ARM64in_LdSt32:
2303 i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD);
2304 mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode);
2305 return;
2306 case ARM64in_LdSt16:
2307 i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD);
2308 mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode);
2309 return;
2310 case ARM64in_LdSt8:
2311 i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD);
2312 mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode);
2313 return;
2314 case ARM64in_XDirect:
2315 mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC);
2316 return;
2317 case ARM64in_XIndir:
2318 i->ARM64in.XIndir.dstGA
2319 = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA);
2320 mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC);
2321 return;
2322 case ARM64in_XAssisted:
2323 i->ARM64in.XAssisted.dstGA
2324 = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA);
2325 mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC);
2326 return;
2327 case ARM64in_CSel:
2328 i->ARM64in.CSel.dst = lookupHRegRemap(m, i->ARM64in.CSel.dst);
2329 i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL);
2330 i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR);
2331 return;
2332 case ARM64in_Call:
2333 return;
2334 case ARM64in_AddToSP:
2335 return;
2336 case ARM64in_FromSP:
2337 i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst);
2338 return;
2339 case ARM64in_Mul:
2340 i->ARM64in.Mul.dst = lookupHRegRemap(m, i->ARM64in.Mul.dst);
2341 i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL);
2342 i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR);
2343 break;
2344 case ARM64in_LdrEX:
2345 return;
2346 case ARM64in_StrEX:
2347 return;
2348 case ARM64in_CAS:
2349 return;
2350 case ARM64in_MFence:
2351 return;
2352 case ARM64in_ClrEX:
2353 return;
2354 case ARM64in_VLdStH:
2355 i->ARM64in.VLdStH.hD = lookupHRegRemap(m, i->ARM64in.VLdStH.hD);
2356 i->ARM64in.VLdStH.rN = lookupHRegRemap(m, i->ARM64in.VLdStH.rN);
2357 return;
2358 case ARM64in_VLdStS:
2359 i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
2360 i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
2361 return;
2362 case ARM64in_VLdStD:
2363 i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD);
2364 i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN);
2365 return;
2366 case ARM64in_VLdStQ:
2367 i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ);
2368 i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN);
2369 return;
2370 case ARM64in_VCvtI2F:
2371 i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS);
2372 i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD);
2373 return;
2374 case ARM64in_VCvtF2I:
2375 i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS);
2376 i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD);
2377 return;
2378 case ARM64in_VCvtSD:
2379 i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
2380 i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
2381 return;
2382 case ARM64in_VCvtHS:
2383 i->ARM64in.VCvtHS.dst = lookupHRegRemap(m, i->ARM64in.VCvtHS.dst);
2384 i->ARM64in.VCvtHS.src = lookupHRegRemap(m, i->ARM64in.VCvtHS.src);
2385 return;
2386 case ARM64in_VCvtHD:
2387 i->ARM64in.VCvtHD.dst = lookupHRegRemap(m, i->ARM64in.VCvtHD.dst);
2388 i->ARM64in.VCvtHD.src = lookupHRegRemap(m, i->ARM64in.VCvtHD.src);
2389 return;
2390 case ARM64in_VUnaryD:
2391 i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
2392 i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
2393 return;
2394 case ARM64in_VUnaryS:
2395 i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst);
2396 i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src);
2397 return;
2398 case ARM64in_VBinD:
2399 i->ARM64in.VBinD.dst = lookupHRegRemap(m, i->ARM64in.VBinD.dst);
2400 i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL);
2401 i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR);
2402 return;
2403 case ARM64in_VBinS:
2404 i->ARM64in.VBinS.dst = lookupHRegRemap(m, i->ARM64in.VBinS.dst);
2405 i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
2406 i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
2407 return;
2408 case ARM64in_VCmpD:
2409 i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
2410 i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
2411 return;
2412 case ARM64in_VCmpS:
2413 i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL);
2414 i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR);
2415 return;
2416 case ARM64in_VFCSel:
2417 i->ARM64in.VFCSel.argL = lookupHRegRemap(m, i->ARM64in.VFCSel.argL);
2418 i->ARM64in.VFCSel.argR = lookupHRegRemap(m, i->ARM64in.VFCSel.argR);
2419 i->ARM64in.VFCSel.dst = lookupHRegRemap(m, i->ARM64in.VFCSel.dst);
2420 return;
2421 case ARM64in_FPCR:
2422 i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg);
2423 return;
2424 case ARM64in_FPSR:
2425 i->ARM64in.FPSR.iReg = lookupHRegRemap(m, i->ARM64in.FPSR.iReg);
2426 return;
2427 case ARM64in_VBinV:
2428 i->ARM64in.VBinV.dst = lookupHRegRemap(m, i->ARM64in.VBinV.dst);
2429 i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL);
2430 i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR);
2431 return;
2432 case ARM64in_VModifyV:
2433 i->ARM64in.VModifyV.mod = lookupHRegRemap(m, i->ARM64in.VModifyV.mod);
2434 i->ARM64in.VModifyV.arg = lookupHRegRemap(m, i->ARM64in.VModifyV.arg);
2435 return;
2436 case ARM64in_VUnaryV:
2437 i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst);
2438 i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg);
2439 return;
2440 case ARM64in_VNarrowV:
2441 i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst);
2442 i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src);
2443 return;
2444 case ARM64in_VShiftImmV:
2445 i->ARM64in.VShiftImmV.dst
2446 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst);
2447 i->ARM64in.VShiftImmV.src
2448 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
2449 return;
2450 case ARM64in_VExtV:
2451 i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst);
2452 i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo);
2453 i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi);
2454 return;
2455 case ARM64in_VImmQ:
2456 i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ);
2457 return;
2458 case ARM64in_VDfromX:
2459 i->ARM64in.VDfromX.rD
2460 = lookupHRegRemap(m, i->ARM64in.VDfromX.rD);
2461 i->ARM64in.VDfromX.rX
2462 = lookupHRegRemap(m, i->ARM64in.VDfromX.rX);
2463 return;
2464 case ARM64in_VQfromX:
2465 i->ARM64in.VQfromX.rQ
2466 = lookupHRegRemap(m, i->ARM64in.VQfromX.rQ);
2467 i->ARM64in.VQfromX.rXlo
2468 = lookupHRegRemap(m, i->ARM64in.VQfromX.rXlo);
2469 return;
2470 case ARM64in_VQfromXX:
2471 i->ARM64in.VQfromXX.rQ
2472 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ);
2473 i->ARM64in.VQfromXX.rXhi
2474 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi);
2475 i->ARM64in.VQfromXX.rXlo
2476 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo);
2477 return;
2478 case ARM64in_VXfromQ:
2479 i->ARM64in.VXfromQ.rX
2480 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX);
2481 i->ARM64in.VXfromQ.rQ
2482 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
2483 return;
2484 case ARM64in_VXfromDorS:
2485 i->ARM64in.VXfromDorS.rX
2486 = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rX);
2487 i->ARM64in.VXfromDorS.rDorS
2488 = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rDorS);
2489 return;
2490 case ARM64in_VMov:
2491 i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
2492 i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
2493 return;
2494 case ARM64in_EvCheck:
2495 /* We expect both amodes only to mention x21, so this is in
2496 fact pointless, since x21 isn't allocatable, but
2497 anyway.. */
2498 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter);
2499 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr);
2500 return;
2501 case ARM64in_ProfInc:
2502 /* hardwires x8 and x9 -- nothing to modify. */
2503 return;
2504 default:
2505 ppARM64Instr(i);
2506 vpanic("mapRegs_ARM64Instr");
2507 }
2508 }
2509
2510 /* Figure out if i represents a reg-reg move, and if so assign the
2511 source and destination to *src and *dst. If in doubt say No. Used
2512 by the register allocator to do move coalescing.
2513 */
isMove_ARM64Instr(const ARM64Instr * i,HReg * src,HReg * dst)2514 Bool isMove_ARM64Instr ( const ARM64Instr* i, HReg* src, HReg* dst )
2515 {
2516 switch (i->tag) {
2517 case ARM64in_MovI:
2518 *src = i->ARM64in.MovI.src;
2519 *dst = i->ARM64in.MovI.dst;
2520 return True;
2521 case ARM64in_VMov:
2522 *src = i->ARM64in.VMov.src;
2523 *dst = i->ARM64in.VMov.dst;
2524 return True;
2525 default:
2526 break;
2527 }
2528
2529 return False;
2530 }
2531
2532
2533 /* Generate arm spill/reload instructions under the direction of the
2534 register allocator. Note it's critical these don't write the
2535 condition codes. */
2536
genSpill_ARM64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2537 void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2538 HReg rreg, Int offsetB, Bool mode64 )
2539 {
2540 HRegClass rclass;
2541 vassert(offsetB >= 0);
2542 vassert(!hregIsVirtual(rreg));
2543 vassert(mode64 == True);
2544 *i1 = *i2 = NULL;
2545 rclass = hregClass(rreg);
2546 switch (rclass) {
2547 case HRcInt64:
2548 vassert(0 == (offsetB & 7));
2549 offsetB >>= 3;
2550 vassert(offsetB < 4096);
2551 *i1 = ARM64Instr_LdSt64(
2552 False/*!isLoad*/,
2553 rreg,
2554 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2555 );
2556 return;
2557 case HRcFlt64:
2558 vassert(0 == (offsetB & 7));
2559 vassert(offsetB >= 0 && offsetB < 32768);
2560 *i1 = ARM64Instr_VLdStD(False/*!isLoad*/,
2561 rreg, hregARM64_X21(), offsetB);
2562 return;
2563 case HRcVec128: {
2564 HReg x21 = hregARM64_X21(); // baseblock
2565 HReg x9 = hregARM64_X9(); // spill temporary
2566 vassert(0 == (offsetB & 15)); // check sane alignment
2567 vassert(offsetB < 4096);
2568 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2569 *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9);
2570 return;
2571 }
2572 default:
2573 ppHRegClass(rclass);
2574 vpanic("genSpill_ARM: unimplemented regclass");
2575 }
2576 }
2577
genReload_ARM64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2578 void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2579 HReg rreg, Int offsetB, Bool mode64 )
2580 {
2581 HRegClass rclass;
2582 vassert(offsetB >= 0);
2583 vassert(!hregIsVirtual(rreg));
2584 vassert(mode64 == True);
2585 *i1 = *i2 = NULL;
2586 rclass = hregClass(rreg);
2587 switch (rclass) {
2588 case HRcInt64:
2589 vassert(0 == (offsetB & 7));
2590 offsetB >>= 3;
2591 vassert(offsetB < 4096);
2592 *i1 = ARM64Instr_LdSt64(
2593 True/*isLoad*/,
2594 rreg,
2595 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2596 );
2597 return;
2598 case HRcFlt64:
2599 vassert(0 == (offsetB & 7));
2600 vassert(offsetB >= 0 && offsetB < 32768);
2601 *i1 = ARM64Instr_VLdStD(True/*isLoad*/,
2602 rreg, hregARM64_X21(), offsetB);
2603 return;
2604 case HRcVec128: {
2605 HReg x21 = hregARM64_X21(); // baseblock
2606 HReg x9 = hregARM64_X9(); // spill temporary
2607 vassert(0 == (offsetB & 15)); // check sane alignment
2608 vassert(offsetB < 4096);
2609 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2610 *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9);
2611 return;
2612 }
2613 default:
2614 ppHRegClass(rclass);
2615 vpanic("genReload_ARM: unimplemented regclass");
2616 }
2617 }
2618
2619
2620 /* Emit an instruction into buf and return the number of bytes used.
2621 Note that buf is not the insn's final place, and therefore it is
2622 imperative to emit position-independent code. */
2623
iregEnc(HReg r)2624 static inline UInt iregEnc ( HReg r )
2625 {
2626 UInt n;
2627 vassert(hregClass(r) == HRcInt64);
2628 vassert(!hregIsVirtual(r));
2629 n = hregEncoding(r);
2630 vassert(n <= 30);
2631 return n;
2632 }
2633
dregEnc(HReg r)2634 static inline UInt dregEnc ( HReg r )
2635 {
2636 UInt n;
2637 vassert(hregClass(r) == HRcFlt64);
2638 vassert(!hregIsVirtual(r));
2639 n = hregEncoding(r);
2640 vassert(n <= 31);
2641 return n;
2642 }
2643
qregEnc(HReg r)2644 static inline UInt qregEnc ( HReg r )
2645 {
2646 UInt n;
2647 vassert(hregClass(r) == HRcVec128);
2648 vassert(!hregIsVirtual(r));
2649 n = hregEncoding(r);
2650 vassert(n <= 31);
2651 return n;
2652 }
2653
2654 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2655 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2656
2657 #define X00 BITS4(0,0, 0,0)
2658 #define X01 BITS4(0,0, 0,1)
2659 #define X10 BITS4(0,0, 1,0)
2660 #define X11 BITS4(0,0, 1,1)
2661
2662 #define X000 BITS4(0, 0,0,0)
2663 #define X001 BITS4(0, 0,0,1)
2664 #define X010 BITS4(0, 0,1,0)
2665 #define X011 BITS4(0, 0,1,1)
2666 #define X100 BITS4(0, 1,0,0)
2667 #define X101 BITS4(0, 1,0,1)
2668 #define X110 BITS4(0, 1,1,0)
2669 #define X111 BITS4(0, 1,1,1)
2670
2671 #define X0000 BITS4(0,0,0,0)
2672 #define X0001 BITS4(0,0,0,1)
2673 #define X0010 BITS4(0,0,1,0)
2674 #define X0011 BITS4(0,0,1,1)
2675
2676 #define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
2677 ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
2678
2679 #define X00000 BITS8(0,0,0, 0,0,0,0,0)
2680 #define X00001 BITS8(0,0,0, 0,0,0,0,1)
2681 #define X00110 BITS8(0,0,0, 0,0,1,1,0)
2682 #define X00111 BITS8(0,0,0, 0,0,1,1,1)
2683 #define X01000 BITS8(0,0,0, 0,1,0,0,0)
2684 #define X10000 BITS8(0,0,0, 1,0,0,0,0)
2685 #define X11000 BITS8(0,0,0, 1,1,0,0,0)
2686 #define X11110 BITS8(0,0,0, 1,1,1,1,0)
2687 #define X11111 BITS8(0,0,0, 1,1,1,1,1)
2688
2689 #define X000000 BITS8(0,0, 0,0,0,0,0,0)
2690 #define X000001 BITS8(0,0, 0,0,0,0,0,1)
2691 #define X000010 BITS8(0,0, 0,0,0,0,1,0)
2692 #define X000011 BITS8(0,0, 0,0,0,0,1,1)
2693 #define X000100 BITS8(0,0, 0,0,0,1,0,0)
2694 #define X000110 BITS8(0,0, 0,0,0,1,1,0)
2695 #define X000111 BITS8(0,0, 0,0,0,1,1,1)
2696 #define X001000 BITS8(0,0, 0,0,1,0,0,0)
2697 #define X001001 BITS8(0,0, 0,0,1,0,0,1)
2698 #define X001010 BITS8(0,0, 0,0,1,0,1,0)
2699 #define X001011 BITS8(0,0, 0,0,1,0,1,1)
2700 #define X001101 BITS8(0,0, 0,0,1,1,0,1)
2701 #define X001110 BITS8(0,0, 0,0,1,1,1,0)
2702 #define X001111 BITS8(0,0, 0,0,1,1,1,1)
2703 #define X010000 BITS8(0,0, 0,1,0,0,0,0)
2704 #define X010001 BITS8(0,0, 0,1,0,0,0,1)
2705 #define X010010 BITS8(0,0, 0,1,0,0,1,0)
2706 #define X010011 BITS8(0,0, 0,1,0,0,1,1)
2707 #define X010101 BITS8(0,0, 0,1,0,1,0,1)
2708 #define X010110 BITS8(0,0, 0,1,0,1,1,0)
2709 #define X010111 BITS8(0,0, 0,1,0,1,1,1)
2710 #define X011001 BITS8(0,0, 0,1,1,0,0,1)
2711 #define X011010 BITS8(0,0, 0,1,1,0,1,0)
2712 #define X011011 BITS8(0,0, 0,1,1,0,1,1)
2713 #define X011101 BITS8(0,0, 0,1,1,1,0,1)
2714 #define X011110 BITS8(0,0, 0,1,1,1,1,0)
2715 #define X011111 BITS8(0,0, 0,1,1,1,1,1)
2716 #define X100001 BITS8(0,0, 1,0,0,0,0,1)
2717 #define X100011 BITS8(0,0, 1,0,0,0,1,1)
2718 #define X100100 BITS8(0,0, 1,0,0,1,0,0)
2719 #define X100101 BITS8(0,0, 1,0,0,1,0,1)
2720 #define X100110 BITS8(0,0, 1,0,0,1,1,0)
2721 #define X100111 BITS8(0,0, 1,0,0,1,1,1)
2722 #define X101101 BITS8(0,0, 1,0,1,1,0,1)
2723 #define X101110 BITS8(0,0, 1,0,1,1,1,0)
2724 #define X110000 BITS8(0,0, 1,1,0,0,0,0)
2725 #define X110001 BITS8(0,0, 1,1,0,0,0,1)
2726 #define X110010 BITS8(0,0, 1,1,0,0,1,0)
2727 #define X110100 BITS8(0,0, 1,1,0,1,0,0)
2728 #define X110101 BITS8(0,0, 1,1,0,1,0,1)
2729 #define X110110 BITS8(0,0, 1,1,0,1,1,0)
2730 #define X110111 BITS8(0,0, 1,1,0,1,1,1)
2731 #define X111000 BITS8(0,0, 1,1,1,0,0,0)
2732 #define X111001 BITS8(0,0, 1,1,1,0,0,1)
2733 #define X111101 BITS8(0,0, 1,1,1,1,0,1)
2734 #define X111110 BITS8(0,0, 1,1,1,1,1,0)
2735 #define X111111 BITS8(0,0, 1,1,1,1,1,1)
2736
2737 #define X0001000 BITS8(0, 0,0,0,1,0,0,0)
2738 #define X0010000 BITS8(0, 0,0,1,0,0,0,0)
2739 #define X0100000 BITS8(0, 0,1,0,0,0,0,0)
2740 #define X1000000 BITS8(0, 1,0,0,0,0,0,0)
2741
2742 #define X00100000 BITS8(0,0,1,0,0,0,0,0)
2743 #define X00100001 BITS8(0,0,1,0,0,0,0,1)
2744 #define X00100010 BITS8(0,0,1,0,0,0,1,0)
2745 #define X00100011 BITS8(0,0,1,0,0,0,1,1)
2746 #define X01010000 BITS8(0,1,0,1,0,0,0,0)
2747 #define X01010001 BITS8(0,1,0,1,0,0,0,1)
2748 #define X01010100 BITS8(0,1,0,1,0,1,0,0)
2749 #define X01011000 BITS8(0,1,0,1,1,0,0,0)
2750 #define X01100000 BITS8(0,1,1,0,0,0,0,0)
2751 #define X01100001 BITS8(0,1,1,0,0,0,0,1)
2752 #define X01100010 BITS8(0,1,1,0,0,0,1,0)
2753 #define X01100011 BITS8(0,1,1,0,0,0,1,1)
2754 #define X01110000 BITS8(0,1,1,1,0,0,0,0)
2755 #define X01110001 BITS8(0,1,1,1,0,0,0,1)
2756 #define X01110010 BITS8(0,1,1,1,0,0,1,0)
2757 #define X01110011 BITS8(0,1,1,1,0,0,1,1)
2758 #define X01110100 BITS8(0,1,1,1,0,1,0,0)
2759 #define X01110101 BITS8(0,1,1,1,0,1,0,1)
2760 #define X01110110 BITS8(0,1,1,1,0,1,1,0)
2761 #define X01110111 BITS8(0,1,1,1,0,1,1,1)
2762 #define X11000001 BITS8(1,1,0,0,0,0,0,1)
2763 #define X11000011 BITS8(1,1,0,0,0,0,1,1)
2764 #define X11010100 BITS8(1,1,0,1,0,1,0,0)
2765 #define X11010110 BITS8(1,1,0,1,0,1,1,0)
2766 #define X11011000 BITS8(1,1,0,1,1,0,0,0)
2767 #define X11011010 BITS8(1,1,0,1,1,0,1,0)
2768 #define X11011110 BITS8(1,1,0,1,1,1,1,0)
2769 #define X11100010 BITS8(1,1,1,0,0,0,1,0)
2770 #define X11110001 BITS8(1,1,1,1,0,0,0,1)
2771 #define X11110011 BITS8(1,1,1,1,0,0,1,1)
2772 #define X11110101 BITS8(1,1,1,1,0,1,0,1)
2773 #define X11110111 BITS8(1,1,1,1,0,1,1,1)
2774
2775
2776 /* --- 4 fields --- */
2777
X_8_19_1_4(UInt f1,UInt f2,UInt f3,UInt f4)2778 static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) {
2779 vassert(8+19+1+4 == 32);
2780 vassert(f1 < (1<<8));
2781 vassert(f2 < (1<<19));
2782 vassert(f3 < (1<<1));
2783 vassert(f4 < (1<<4));
2784 UInt w = 0;
2785 w = (w << 8) | f1;
2786 w = (w << 19) | f2;
2787 w = (w << 1) | f3;
2788 w = (w << 4) | f4;
2789 return w;
2790 }
2791
2792 /* --- 5 fields --- */
2793
X_3_6_2_16_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5)2794 static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2,
2795 UInt f3, UInt f4, UInt f5 ) {
2796 vassert(3+6+2+16+5 == 32);
2797 vassert(f1 < (1<<3));
2798 vassert(f2 < (1<<6));
2799 vassert(f3 < (1<<2));
2800 vassert(f4 < (1<<16));
2801 vassert(f5 < (1<<5));
2802 UInt w = 0;
2803 w = (w << 3) | f1;
2804 w = (w << 6) | f2;
2805 w = (w << 2) | f3;
2806 w = (w << 16) | f4;
2807 w = (w << 5) | f5;
2808 return w;
2809 }
2810
2811 /* --- 6 fields --- */
2812
X_2_6_2_12_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6)2813 static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3,
2814 UInt f4, UInt f5, UInt f6 ) {
2815 vassert(2+6+2+12+5+5 == 32);
2816 vassert(f1 < (1<<2));
2817 vassert(f2 < (1<<6));
2818 vassert(f3 < (1<<2));
2819 vassert(f4 < (1<<12));
2820 vassert(f5 < (1<<5));
2821 vassert(f6 < (1<<5));
2822 UInt w = 0;
2823 w = (w << 2) | f1;
2824 w = (w << 6) | f2;
2825 w = (w << 2) | f3;
2826 w = (w << 12) | f4;
2827 w = (w << 5) | f5;
2828 w = (w << 5) | f6;
2829 return w;
2830 }
2831
X_3_8_5_6_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6)2832 static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3,
2833 UInt f4, UInt f5, UInt f6 ) {
2834 vassert(3+8+5+6+5+5 == 32);
2835 vassert(f1 < (1<<3));
2836 vassert(f2 < (1<<8));
2837 vassert(f3 < (1<<5));
2838 vassert(f4 < (1<<6));
2839 vassert(f5 < (1<<5));
2840 vassert(f6 < (1<<5));
2841 UInt w = 0;
2842 w = (w << 3) | f1;
2843 w = (w << 8) | f2;
2844 w = (w << 5) | f3;
2845 w = (w << 6) | f4;
2846 w = (w << 5) | f5;
2847 w = (w << 5) | f6;
2848 return w;
2849 }
2850
X_3_5_8_6_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6)2851 static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3,
2852 UInt f4, UInt f5, UInt f6 ) {
2853 vassert(3+8+5+6+5+5 == 32);
2854 vassert(f1 < (1<<3));
2855 vassert(f2 < (1<<5));
2856 vassert(f3 < (1<<8));
2857 vassert(f4 < (1<<6));
2858 vassert(f5 < (1<<5));
2859 vassert(f6 < (1<<5));
2860 UInt w = 0;
2861 w = (w << 3) | f1;
2862 w = (w << 5) | f2;
2863 w = (w << 8) | f3;
2864 w = (w << 6) | f4;
2865 w = (w << 5) | f5;
2866 w = (w << 5) | f6;
2867 return w;
2868 }
2869
X_3_6_7_6_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6)2870 static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3,
2871 UInt f4, UInt f5, UInt f6 ) {
2872 vassert(3+6+7+6+5+5 == 32);
2873 vassert(f1 < (1<<3));
2874 vassert(f2 < (1<<6));
2875 vassert(f3 < (1<<7));
2876 vassert(f4 < (1<<6));
2877 vassert(f5 < (1<<5));
2878 vassert(f6 < (1<<5));
2879 UInt w = 0;
2880 w = (w << 3) | f1;
2881 w = (w << 6) | f2;
2882 w = (w << 7) | f3;
2883 w = (w << 6) | f4;
2884 w = (w << 5) | f5;
2885 w = (w << 5) | f6;
2886 return w;
2887 }
2888
2889 /* --- 7 fields --- */
2890
X_2_6_3_9_2_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6,UInt f7)2891 static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3,
2892 UInt f4, UInt f5, UInt f6, UInt f7 ) {
2893 vassert(2+6+3+9+2+5+5 == 32);
2894 vassert(f1 < (1<<2));
2895 vassert(f2 < (1<<6));
2896 vassert(f3 < (1<<3));
2897 vassert(f4 < (1<<9));
2898 vassert(f5 < (1<<2));
2899 vassert(f6 < (1<<5));
2900 vassert(f7 < (1<<5));
2901 UInt w = 0;
2902 w = (w << 2) | f1;
2903 w = (w << 6) | f2;
2904 w = (w << 3) | f3;
2905 w = (w << 9) | f4;
2906 w = (w << 2) | f5;
2907 w = (w << 5) | f6;
2908 w = (w << 5) | f7;
2909 return w;
2910 }
2911
X_3_6_1_6_6_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6,UInt f7)2912 static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
2913 UInt f4, UInt f5, UInt f6, UInt f7 ) {
2914 vassert(3+6+1+6+6+5+5 == 32);
2915 vassert(f1 < (1<<3));
2916 vassert(f2 < (1<<6));
2917 vassert(f3 < (1<<1));
2918 vassert(f4 < (1<<6));
2919 vassert(f5 < (1<<6));
2920 vassert(f6 < (1<<5));
2921 vassert(f7 < (1<<5));
2922 UInt w = 0;
2923 w = (w << 3) | f1;
2924 w = (w << 6) | f2;
2925 w = (w << 1) | f3;
2926 w = (w << 6) | f4;
2927 w = (w << 6) | f5;
2928 w = (w << 5) | f6;
2929 w = (w << 5) | f7;
2930 return w;
2931 }
2932
2933
2934 //ZZ #define X0000 BITS4(0,0,0,0)
2935 //ZZ #define X0001 BITS4(0,0,0,1)
2936 //ZZ #define X0010 BITS4(0,0,1,0)
2937 //ZZ #define X0011 BITS4(0,0,1,1)
2938 //ZZ #define X0100 BITS4(0,1,0,0)
2939 //ZZ #define X0101 BITS4(0,1,0,1)
2940 //ZZ #define X0110 BITS4(0,1,1,0)
2941 //ZZ #define X0111 BITS4(0,1,1,1)
2942 //ZZ #define X1000 BITS4(1,0,0,0)
2943 //ZZ #define X1001 BITS4(1,0,0,1)
2944 //ZZ #define X1010 BITS4(1,0,1,0)
2945 //ZZ #define X1011 BITS4(1,0,1,1)
2946 //ZZ #define X1100 BITS4(1,1,0,0)
2947 //ZZ #define X1101 BITS4(1,1,0,1)
2948 //ZZ #define X1110 BITS4(1,1,1,0)
2949 //ZZ #define X1111 BITS4(1,1,1,1)
2950 /*
2951 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2952 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2953 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2954 (((zzx3) & 0xF) << 12))
2955
2956 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2957 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2958 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2959 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
2960
2961 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
2962 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2963 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2964 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
2965
2966 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2967 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2968 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2969 (((zzx0) & 0xF) << 0))
2970
2971 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
2972 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2973 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2974 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
2975 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
2976
2977 #define XX______(zzx7,zzx6) \
2978 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2979 */
2980
2981
2982 /* Get an immediate into a register, using only that register. */
imm64_to_ireg(UInt * p,Int xD,ULong imm64)2983 static UInt* imm64_to_ireg ( UInt* p, Int xD, ULong imm64 )
2984 {
2985 if (imm64 == 0) {
2986 // This has to be special-cased, since the logic below
2987 // will leave the register unchanged in this case.
2988 // MOVZ xD, #0, LSL #0
2989 *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD);
2990 return p;
2991 }
2992
2993 // There must be at least one non-zero halfword. Find the
2994 // lowest nonzero such, and use MOVZ to install it and zero
2995 // out the rest of the register.
2996 UShort h[4];
2997 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
2998 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
2999 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3000 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3001
3002 UInt i;
3003 for (i = 0; i < 4; i++) {
3004 if (h[i] != 0)
3005 break;
3006 }
3007 vassert(i < 4);
3008
3009 // MOVZ xD, h[i], LSL (16*i)
3010 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3011
3012 // Work on upwards through h[i], using MOVK to stuff in any
3013 // remaining nonzero elements.
3014 i++;
3015 for (; i < 4; i++) {
3016 if (h[i] == 0)
3017 continue;
3018 // MOVK xD, h[i], LSL (16*i)
3019 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3020 }
3021
3022 return p;
3023 }
3024
3025 /* Get an immediate into a register, using only that register, and
3026 generating exactly 4 instructions, regardless of the value of the
3027 immediate. This is used when generating sections of code that need
3028 to be patched later, so as to guarantee a specific size. */
imm64_to_ireg_EXACTLY4(UInt * p,Int xD,ULong imm64)3029 static UInt* imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
3030 {
3031 UShort h[4];
3032 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3033 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3034 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3035 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3036 // Work on upwards through h[i], using MOVK to stuff in the
3037 // remaining elements.
3038 UInt i;
3039 for (i = 0; i < 4; i++) {
3040 if (i == 0) {
3041 // MOVZ xD, h[0], LSL (16*0)
3042 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3043 } else {
3044 // MOVK xD, h[i], LSL (16*i)
3045 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3046 }
3047 }
3048 return p;
3049 }
3050
3051 /* Check whether p points at a 4-insn sequence cooked up by
3052 imm64_to_ireg_EXACTLY4(). */
is_imm64_to_ireg_EXACTLY4(UInt * p,Int xD,ULong imm64)3053 static Bool is_imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
3054 {
3055 UShort h[4];
3056 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3057 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3058 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3059 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3060 // Work on upwards through h[i], using MOVK to stuff in the
3061 // remaining elements.
3062 UInt i;
3063 for (i = 0; i < 4; i++) {
3064 UInt expected;
3065 if (i == 0) {
3066 // MOVZ xD, h[0], LSL (16*0)
3067 expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3068 } else {
3069 // MOVK xD, h[i], LSL (16*i)
3070 expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3071 }
3072 if (p[i] != expected)
3073 return False;
3074 }
3075 return True;
3076 }
3077
3078
3079 /* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
3080 rD, using the given amode for the address. */
do_load_or_store8(UInt * p,Bool isLoad,UInt wD,ARM64AMode * am)3081 static UInt* do_load_or_store8 ( UInt* p,
3082 Bool isLoad, UInt wD, ARM64AMode* am )
3083 {
3084 vassert(wD <= 30);
3085 if (am->tag == ARM64am_RI9) {
3086 /* STURB Wd, [Xn|SP + simm9]: 00 111000 000 simm9 00 n d
3087 LDURB Wd, [Xn|SP + simm9]: 00 111000 010 simm9 00 n d
3088 */
3089 Int simm9 = am->ARM64am.RI9.simm9;
3090 vassert(-256 <= simm9 && simm9 <= 255);
3091 UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000,
3092 simm9 & 0x1FF, X00,
3093 iregEnc(am->ARM64am.RI9.reg), wD);
3094 *p++ = instr;
3095 return p;
3096 }
3097 if (am->tag == ARM64am_RI12) {
3098 /* STRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 00 imm12 n d
3099 LDRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 01 imm12 n d
3100 */
3101 UInt uimm12 = am->ARM64am.RI12.uimm12;
3102 UInt scale = am->ARM64am.RI12.szB;
3103 vassert(scale == 1); /* failure of this is serious. Do not ignore. */
3104 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3105 vassert(xN <= 30);
3106 UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00,
3107 uimm12, xN, wD);
3108 *p++ = instr;
3109 return p;
3110 }
3111 if (am->tag == ARM64am_RR) {
3112 /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
3113 LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
3114 */
3115 UInt xN = iregEnc(am->ARM64am.RR.base);
3116 UInt xM = iregEnc(am->ARM64am.RR.index);
3117 vassert(xN <= 30);
3118 UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001,
3119 xM, X011010, xN, wD);
3120 *p++ = instr;
3121 return p;
3122 }
3123 vpanic("do_load_or_store8");
3124 vassert(0);
3125 }
3126
3127
3128 /* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
3129 rD, using the given amode for the address. */
do_load_or_store16(UInt * p,Bool isLoad,UInt wD,ARM64AMode * am)3130 static UInt* do_load_or_store16 ( UInt* p,
3131 Bool isLoad, UInt wD, ARM64AMode* am )
3132 {
3133 vassert(wD <= 30);
3134 if (am->tag == ARM64am_RI9) {
3135 /* STURH Wd, [Xn|SP + simm9]: 01 111000 000 simm9 00 n d
3136 LDURH Wd, [Xn|SP + simm9]: 01 111000 010 simm9 00 n d
3137 */
3138 Int simm9 = am->ARM64am.RI9.simm9;
3139 vassert(-256 <= simm9 && simm9 <= 255);
3140 UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000,
3141 simm9 & 0x1FF, X00,
3142 iregEnc(am->ARM64am.RI9.reg), wD);
3143 *p++ = instr;
3144 return p;
3145 }
3146 if (am->tag == ARM64am_RI12) {
3147 /* STRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 00 imm12 n d
3148 LDRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 01 imm12 n d
3149 */
3150 UInt uimm12 = am->ARM64am.RI12.uimm12;
3151 UInt scale = am->ARM64am.RI12.szB;
3152 vassert(scale == 2); /* failure of this is serious. Do not ignore. */
3153 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3154 vassert(xN <= 30);
3155 UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00,
3156 uimm12, xN, wD);
3157 *p++ = instr;
3158 return p;
3159 }
3160 if (am->tag == ARM64am_RR) {
3161 /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
3162 LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
3163 */
3164 UInt xN = iregEnc(am->ARM64am.RR.base);
3165 UInt xM = iregEnc(am->ARM64am.RR.index);
3166 vassert(xN <= 30);
3167 UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001,
3168 xM, X011010, xN, wD);
3169 *p++ = instr;
3170 return p;
3171 }
3172 vpanic("do_load_or_store16");
3173 vassert(0);
3174 }
3175
3176
3177 /* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
3178 rD, using the given amode for the address. */
do_load_or_store32(UInt * p,Bool isLoad,UInt wD,ARM64AMode * am)3179 static UInt* do_load_or_store32 ( UInt* p,
3180 Bool isLoad, UInt wD, ARM64AMode* am )
3181 {
3182 vassert(wD <= 30);
3183 if (am->tag == ARM64am_RI9) {
3184 /* STUR Wd, [Xn|SP + simm9]: 10 111000 000 simm9 00 n d
3185 LDUR Wd, [Xn|SP + simm9]: 10 111000 010 simm9 00 n d
3186 */
3187 Int simm9 = am->ARM64am.RI9.simm9;
3188 vassert(-256 <= simm9 && simm9 <= 255);
3189 UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000,
3190 simm9 & 0x1FF, X00,
3191 iregEnc(am->ARM64am.RI9.reg), wD);
3192 *p++ = instr;
3193 return p;
3194 }
3195 if (am->tag == ARM64am_RI12) {
3196 /* STR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 00 imm12 n d
3197 LDR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 01 imm12 n d
3198 */
3199 UInt uimm12 = am->ARM64am.RI12.uimm12;
3200 UInt scale = am->ARM64am.RI12.szB;
3201 vassert(scale == 4); /* failure of this is serious. Do not ignore. */
3202 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3203 vassert(xN <= 30);
3204 UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00,
3205 uimm12, xN, wD);
3206 *p++ = instr;
3207 return p;
3208 }
3209 if (am->tag == ARM64am_RR) {
3210 /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
3211 LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
3212 */
3213 UInt xN = iregEnc(am->ARM64am.RR.base);
3214 UInt xM = iregEnc(am->ARM64am.RR.index);
3215 vassert(xN <= 30);
3216 UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001,
3217 xM, X011010, xN, wD);
3218 *p++ = instr;
3219 return p;
3220 }
3221 vpanic("do_load_or_store32");
3222 vassert(0);
3223 }
3224
3225
3226 /* Generate a 64 bit load or store to/from xD, using the given amode
3227 for the address. */
do_load_or_store64(UInt * p,Bool isLoad,UInt xD,ARM64AMode * am)3228 static UInt* do_load_or_store64 ( UInt* p,
3229 Bool isLoad, UInt xD, ARM64AMode* am )
3230 {
3231 /* In all these cases, Rn can't be 31 since that means SP. */
3232 vassert(xD <= 30);
3233 if (am->tag == ARM64am_RI9) {
3234 /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d
3235 LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d
3236 */
3237 Int simm9 = am->ARM64am.RI9.simm9;
3238 vassert(-256 <= simm9 && simm9 <= 255);
3239 UInt xN = iregEnc(am->ARM64am.RI9.reg);
3240 vassert(xN <= 30);
3241 UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000,
3242 simm9 & 0x1FF, X00, xN, xD);
3243 *p++ = instr;
3244 return p;
3245 }
3246 if (am->tag == ARM64am_RI12) {
3247 /* STR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 00 imm12 n d
3248 LDR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 01 imm12 n d
3249 */
3250 UInt uimm12 = am->ARM64am.RI12.uimm12;
3251 UInt scale = am->ARM64am.RI12.szB;
3252 vassert(scale == 8); /* failure of this is serious. Do not ignore. */
3253 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3254 vassert(xN <= 30);
3255 UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00,
3256 uimm12, xN, xD);
3257 *p++ = instr;
3258 return p;
3259 }
3260 if (am->tag == ARM64am_RR) {
3261 /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
3262 LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
3263 */
3264 UInt xN = iregEnc(am->ARM64am.RR.base);
3265 UInt xM = iregEnc(am->ARM64am.RR.index);
3266 vassert(xN <= 30);
3267 UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001,
3268 xM, X011010, xN, xD);
3269 *p++ = instr;
3270 return p;
3271 }
3272 vpanic("do_load_or_store64");
3273 vassert(0);
3274 }
3275
3276
3277 /* Emit an instruction into buf and return the number of bytes used.
3278 Note that buf is not the insn's final place, and therefore it is
3279 imperative to emit position-independent code. If the emitted
3280 instruction was a profiler inc, set *is_profInc to True, else
3281 leave it unchanged. */
3282
emit_ARM64Instr(Bool * is_profInc,UChar * buf,Int nbuf,const ARM64Instr * i,Bool mode64,VexEndness endness_host,const void * disp_cp_chain_me_to_slowEP,const void * disp_cp_chain_me_to_fastEP,const void * disp_cp_xindir,const void * disp_cp_xassisted)3283 Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
3284 UChar* buf, Int nbuf, const ARM64Instr* i,
3285 Bool mode64, VexEndness endness_host,
3286 const void* disp_cp_chain_me_to_slowEP,
3287 const void* disp_cp_chain_me_to_fastEP,
3288 const void* disp_cp_xindir,
3289 const void* disp_cp_xassisted )
3290 {
3291 UInt* p = (UInt*)buf;
3292 vassert(nbuf >= 32);
3293 vassert(mode64 == True);
3294 vassert(0 == (((HWord)buf) & 3));
3295
3296 switch (i->tag) {
3297 case ARM64in_Arith: {
3298 UInt rD = iregEnc(i->ARM64in.Arith.dst);
3299 UInt rN = iregEnc(i->ARM64in.Arith.argL);
3300 ARM64RIA* argR = i->ARM64in.Arith.argR;
3301 switch (argR->tag) {
3302 case ARM64riA_I12:
3303 *p++ = X_2_6_2_12_5_5(
3304 i->ARM64in.Arith.isAdd ? X10 : X11,
3305 X010001,
3306 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3307 argR->ARM64riA.I12.imm12, rN, rD
3308 );
3309 break;
3310 case ARM64riA_R: {
3311 UInt rM = iregEnc(i->ARM64in.Arith.argR->ARM64riA.R.reg);
3312 *p++ = X_3_8_5_6_5_5(
3313 i->ARM64in.Arith.isAdd ? X100 : X110,
3314 X01011000, rM, X000000, rN, rD
3315 );
3316 break;
3317 }
3318 default:
3319 goto bad;
3320 }
3321 goto done;
3322 }
3323 case ARM64in_Cmp: {
3324 UInt rD = 31; /* XZR, we are going to dump the result */
3325 UInt rN = iregEnc(i->ARM64in.Cmp.argL);
3326 ARM64RIA* argR = i->ARM64in.Cmp.argR;
3327 Bool is64 = i->ARM64in.Cmp.is64;
3328 switch (argR->tag) {
3329 case ARM64riA_I12:
3330 /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
3331 /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
3332 *p++ = X_2_6_2_12_5_5(
3333 is64 ? X11 : X01, X110001,
3334 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3335 argR->ARM64riA.I12.imm12, rN, rD);
3336 break;
3337 case ARM64riA_R: {
3338 /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
3339 /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
3340 UInt rM = iregEnc(i->ARM64in.Cmp.argR->ARM64riA.R.reg);
3341 *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011,
3342 X01011000, rM, X000000, rN, rD);
3343 break;
3344 }
3345 default:
3346 goto bad;
3347 }
3348 goto done;
3349 }
3350 case ARM64in_Logic: {
3351 UInt rD = iregEnc(i->ARM64in.Logic.dst);
3352 UInt rN = iregEnc(i->ARM64in.Logic.argL);
3353 ARM64RIL* argR = i->ARM64in.Logic.argR;
3354 UInt opc = 0; /* invalid */
3355 vassert(rD < 31);
3356 vassert(rN < 31);
3357 switch (i->ARM64in.Logic.op) {
3358 case ARM64lo_OR: opc = X101; break;
3359 case ARM64lo_AND: opc = X100; break;
3360 case ARM64lo_XOR: opc = X110; break;
3361 default: break;
3362 }
3363 vassert(opc != 0);
3364 switch (argR->tag) {
3365 case ARM64riL_I13: {
3366 /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
3367 /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
3368 /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
3369 *p++ = X_3_6_1_6_6_5_5(
3370 opc, X100100, argR->ARM64riL.I13.bitN,
3371 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3372 rN, rD
3373 );
3374 break;
3375 }
3376 case ARM64riL_R: {
3377 /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
3378 /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
3379 /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
3380 UInt rM = iregEnc(argR->ARM64riL.R.reg);
3381 vassert(rM < 31);
3382 *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD);
3383 break;
3384 }
3385 default:
3386 goto bad;
3387 }
3388 goto done;
3389 }
3390 case ARM64in_Test: {
3391 UInt rD = 31; /* XZR, we are going to dump the result */
3392 UInt rN = iregEnc(i->ARM64in.Test.argL);
3393 ARM64RIL* argR = i->ARM64in.Test.argR;
3394 switch (argR->tag) {
3395 case ARM64riL_I13: {
3396 /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
3397 *p++ = X_3_6_1_6_6_5_5(
3398 X111, X100100, argR->ARM64riL.I13.bitN,
3399 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3400 rN, rD
3401 );
3402 break;
3403 }
3404 default:
3405 goto bad;
3406 }
3407 goto done;
3408 }
3409 case ARM64in_Shift: {
3410 UInt rD = iregEnc(i->ARM64in.Shift.dst);
3411 UInt rN = iregEnc(i->ARM64in.Shift.argL);
3412 ARM64RI6* argR = i->ARM64in.Shift.argR;
3413 vassert(rD < 31);
3414 vassert(rN < 31);
3415 switch (argR->tag) {
3416 case ARM64ri6_I6: {
3417 /* 110 1001101 (63-sh) (64-sh) nn dd LSL Xd, Xn, sh */
3418 /* 110 1001101 sh 63 nn dd LSR Xd, Xn, sh */
3419 /* 100 1001101 sh 63 nn dd ASR Xd, Xn, sh */
3420 UInt sh = argR->ARM64ri6.I6.imm6;
3421 vassert(sh > 0 && sh < 64);
3422 switch (i->ARM64in.Shift.op) {
3423 case ARM64sh_SHL:
3424 *p++ = X_3_6_1_6_6_5_5(X110, X100110,
3425 1, 64-sh, 63-sh, rN, rD);
3426 break;
3427 case ARM64sh_SHR:
3428 *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD);
3429 break;
3430 case ARM64sh_SAR:
3431 *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD);
3432 break;
3433 default:
3434 vassert(0);
3435 }
3436 break;
3437 }
3438 case ARM64ri6_R: {
3439 /* 100 1101 0110 mm 001000 nn dd LSL Xd, Xn, Xm */
3440 /* 100 1101 0110 mm 001001 nn dd LSR Xd, Xn, Xm */
3441 /* 100 1101 0110 mm 001010 nn dd ASR Xd, Xn, Xm */
3442 UInt rM = iregEnc(argR->ARM64ri6.R.reg);
3443 vassert(rM < 31);
3444 UInt subOpc = 0;
3445 switch (i->ARM64in.Shift.op) {
3446 case ARM64sh_SHL: subOpc = X001000; break;
3447 case ARM64sh_SHR: subOpc = X001001; break;
3448 case ARM64sh_SAR: subOpc = X001010; break;
3449 default: vassert(0);
3450 }
3451 *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD);
3452 break;
3453 }
3454 default:
3455 vassert(0);
3456 }
3457 goto done;
3458 }
3459 case ARM64in_Unary: {
3460 UInt rDst = iregEnc(i->ARM64in.Unary.dst);
3461 UInt rSrc = iregEnc(i->ARM64in.Unary.src);
3462 switch (i->ARM64in.Unary.op) {
3463 case ARM64un_CLZ:
3464 /* 1 10 1101 0110 00000 00010 0 nn dd CLZ Xd, Xn */
3465 /* 1 10 1101 0110 00000 00010 1 nn dd CLS Xd, Xn (unimp) */
3466 *p++ = X_3_8_5_6_5_5(X110,
3467 X11010110, X00000, X000100, rSrc, rDst);
3468 goto done;
3469 case ARM64un_NEG:
3470 /* 1 10 01011 000 m 000000 11111 d NEG Xd,Xm */
3471 /* 0 10 01011 000 m 000000 11111 d NEG Wd,Wm (unimp) */
3472 *p++ = X_3_8_5_6_5_5(X110,
3473 X01011000, rSrc, X000000, X11111, rDst);
3474 goto done;
3475 case ARM64un_NOT: {
3476 /* 1 01 01010 00 1 m 000000 11111 d MVN Xd,Xm */
3477 *p++ = X_3_8_5_6_5_5(X101,
3478 X01010001, rSrc, X000000, X11111, rDst);
3479 goto done;
3480 }
3481 default:
3482 break;
3483 }
3484 goto bad;
3485 }
3486 case ARM64in_MovI: {
3487 /* We generate the "preferred form", ORR Xd, XZR, Xm
3488 101 01010 00 0 m 000000 11111 d
3489 */
3490 UInt instr = 0xAA0003E0;
3491 UInt d = iregEnc(i->ARM64in.MovI.dst);
3492 UInt m = iregEnc(i->ARM64in.MovI.src);
3493 *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0);
3494 goto done;
3495 }
3496 case ARM64in_Imm64: {
3497 p = imm64_to_ireg( p, iregEnc(i->ARM64in.Imm64.dst),
3498 i->ARM64in.Imm64.imm64 );
3499 goto done;
3500 }
3501 case ARM64in_LdSt64: {
3502 p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad,
3503 iregEnc(i->ARM64in.LdSt64.rD),
3504 i->ARM64in.LdSt64.amode );
3505 goto done;
3506 }
3507 case ARM64in_LdSt32: {
3508 p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad,
3509 iregEnc(i->ARM64in.LdSt32.rD),
3510 i->ARM64in.LdSt32.amode );
3511 goto done;
3512 }
3513 case ARM64in_LdSt16: {
3514 p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad,
3515 iregEnc(i->ARM64in.LdSt16.rD),
3516 i->ARM64in.LdSt16.amode );
3517 goto done;
3518 }
3519 case ARM64in_LdSt8: {
3520 p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad,
3521 iregEnc(i->ARM64in.LdSt8.rD),
3522 i->ARM64in.LdSt8.amode );
3523 goto done;
3524 }
3525
3526 case ARM64in_XDirect: {
3527 /* NB: what goes on here has to be very closely coordinated
3528 with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
3529 /* We're generating chain-me requests here, so we need to be
3530 sure this is actually allowed -- no-redir translations
3531 can't use chain-me's. Hence: */
3532 vassert(disp_cp_chain_me_to_slowEP != NULL);
3533 vassert(disp_cp_chain_me_to_fastEP != NULL);
3534
3535 /* Use ptmp for backpatching conditional jumps. */
3536 UInt* ptmp = NULL;
3537
3538 /* First off, if this is conditional, create a conditional
3539 jump over the rest of it. Or at least, leave a space for
3540 it that we will shortly fill in. */
3541 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3542 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3543 ptmp = p;
3544 *p++ = 0;
3545 }
3546
3547 /* Update the guest PC. */
3548 /* imm64 x9, dstGA */
3549 /* str x9, amPC */
3550 p = imm64_to_ireg(p, /*x*/9, i->ARM64in.XDirect.dstGA);
3551 p = do_load_or_store64(p, False/*!isLoad*/,
3552 /*x*/9, i->ARM64in.XDirect.amPC);
3553
3554 /* --- FIRST PATCHABLE BYTE follows --- */
3555 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3556 calling to) backs up the return address, so as to find the
3557 address of the first patchable byte. So: don't change the
3558 number of instructions (5) below. */
3559 /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
3560 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
3561 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
3562 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
3563 /* blr x9 */
3564 const void* disp_cp_chain_me
3565 = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3566 : disp_cp_chain_me_to_slowEP;
3567 p = imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
3568 *p++ = 0xD63F0120;
3569 /* --- END of PATCHABLE BYTES --- */
3570
3571 /* Fix up the conditional jump, if there was one. */
3572 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3573 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3574 vassert(delta > 0 && delta < 40);
3575 vassert((delta & 3) == 0);
3576 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3577 vassert(notCond <= 13); /* Neither AL nor NV */
3578 vassert(ptmp != NULL);
3579 delta = delta >> 2;
3580 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3581 }
3582 goto done;
3583 }
3584
3585 case ARM64in_XIndir: {
3586 // XIndir is more or less the same as XAssisted, except
3587 // we don't have a trc value to hand back, so there's no
3588 // write to r21
3589 /* Use ptmp for backpatching conditional jumps. */
3590 //UInt* ptmp = NULL;
3591
3592 /* First off, if this is conditional, create a conditional
3593 jump over the rest of it. Or at least, leave a space for
3594 it that we will shortly fill in. */
3595 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3596 vassert(0); //ATC
3597 //ZZ vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3598 //ZZ ptmp = p;
3599 //ZZ *p++ = 0;
3600 }
3601
3602 /* Update the guest PC. */
3603 /* str r-dstGA, amPC */
3604 p = do_load_or_store64(p, False/*!isLoad*/,
3605 iregEnc(i->ARM64in.XIndir.dstGA),
3606 i->ARM64in.XIndir.amPC);
3607
3608 /* imm64 x9, VG_(disp_cp_xindir) */
3609 /* br x9 */
3610 p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xindir);
3611 *p++ = 0xD61F0120; /* br x9 */
3612
3613 /* Fix up the conditional jump, if there was one. */
3614 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3615 vassert(0); //ATC
3616 //ZZ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3617 //ZZ vassert(delta > 0 && delta < 40);
3618 //ZZ vassert((delta & 3) == 0);
3619 //ZZ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3620 //ZZ vassert(notCond <= 13); /* Neither AL nor NV */
3621 //ZZ delta = (delta >> 2) - 2;
3622 //ZZ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3623 }
3624 goto done;
3625 }
3626
3627 case ARM64in_XAssisted: {
3628 /* Use ptmp for backpatching conditional jumps. */
3629 UInt* ptmp = NULL;
3630
3631 /* First off, if this is conditional, create a conditional
3632 jump over the rest of it. Or at least, leave a space for
3633 it that we will shortly fill in. I think this can only
3634 ever happen when VEX is driven by the switchbacker. */
3635 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
3636 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3637 ptmp = p;
3638 *p++ = 0;
3639 }
3640
3641 /* Update the guest PC. */
3642 /* str r-dstGA, amPC */
3643 p = do_load_or_store64(p, False/*!isLoad*/,
3644 iregEnc(i->ARM64in.XAssisted.dstGA),
3645 i->ARM64in.XAssisted.amPC);
3646
3647 /* movw r21, $magic_number */
3648 UInt trcval = 0;
3649 switch (i->ARM64in.XAssisted.jk) {
3650 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3651 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3652 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
3653 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3654 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3655 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3656 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3657 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3658 case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
3659 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3660 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3661 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3662 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3663 /* We don't expect to see the following being assisted. */
3664 //case Ijk_Ret:
3665 //case Ijk_Call:
3666 /* fallthrough */
3667 default:
3668 ppIRJumpKind(i->ARM64in.XAssisted.jk);
3669 vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
3670 "unexpected jump kind");
3671 }
3672 vassert(trcval != 0);
3673 p = imm64_to_ireg(p, /*x*/21, (ULong)trcval);
3674
3675 /* imm64 x9, VG_(disp_cp_xassisted) */
3676 /* br x9 */
3677 p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xassisted);
3678 *p++ = 0xD61F0120; /* br x9 */
3679
3680 /* Fix up the conditional jump, if there was one. */
3681 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
3682 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3683 vassert(delta > 0 && delta < 40);
3684 vassert((delta & 3) == 0);
3685 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3686 vassert(notCond <= 13); /* Neither AL nor NV */
3687 vassert(ptmp != NULL);
3688 delta = delta >> 2;
3689 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3690 }
3691 goto done;
3692 }
3693
3694 case ARM64in_CSel: {
3695 /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
3696 UInt dd = iregEnc(i->ARM64in.CSel.dst);
3697 UInt nn = iregEnc(i->ARM64in.CSel.argL);
3698 UInt mm = iregEnc(i->ARM64in.CSel.argR);
3699 UInt cond = (UInt)i->ARM64in.CSel.cond;
3700 vassert(dd < 31 && nn < 31 && mm < 31 && cond < 16);
3701 *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd);
3702 goto done;
3703 }
3704
3705 case ARM64in_Call: {
3706 /* We'll use x9 as a scratch register to put the target
3707 address in. */
3708 if (i->ARM64in.Call.cond != ARM64cc_AL
3709 && i->ARM64in.Call.rloc.pri != RLPri_None) {
3710 /* The call might not happen (it isn't unconditional) and
3711 it returns a result. In this case we will need to
3712 generate a control flow diamond to put 0x555..555 in
3713 the return register(s) in the case where the call
3714 doesn't happen. If this ever becomes necessary, maybe
3715 copy code from the 32-bit ARM equivalent. Until that
3716 day, just give up. */
3717 goto bad;
3718 }
3719
3720 UInt* ptmp = NULL;
3721 if (i->ARM64in.Call.cond != ARM64cc_AL) {
3722 /* Create a hole to put a conditional branch in. We'll
3723 patch it once we know the branch length. */
3724 ptmp = p;
3725 *p++ = 0;
3726 }
3727
3728 // x9 = &target
3729 p = imm64_to_ireg( (UInt*)p, /*x*/9, (ULong)i->ARM64in.Call.target );
3730 // blr x9
3731 *p++ = 0xD63F0120;
3732
3733 // Patch the hole if necessary
3734 if (i->ARM64in.Call.cond != ARM64cc_AL) {
3735 ULong dist = (ULong)(p - ptmp);
3736 /* imm64_to_ireg produces between 1 and 4 insns, and
3737 then there's the BLR itself. Hence: */
3738 vassert(dist >= 2 && dist <= 5);
3739 vassert(ptmp != NULL);
3740 // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
3741 *ptmp = X_8_19_1_4(X01010100, dist, 0,
3742 1 ^ (UInt)i->ARM64in.Call.cond);
3743 } else {
3744 vassert(ptmp == NULL);
3745 }
3746
3747 goto done;
3748 }
3749
3750 case ARM64in_AddToSP: {
3751 /* 10,0 10001 00 imm12 11111 11111 ADD xsp, xsp, #imm12
3752 11,0 10001 00 imm12 11111 11111 SUB xsp, xsp, #imm12
3753 */
3754 Int simm12 = i->ARM64in.AddToSP.simm;
3755 vassert(-4096 < simm12 && simm12 < 4096);
3756 vassert(0 == (simm12 & 0xF));
3757 if (simm12 >= 0) {
3758 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111);
3759 } else {
3760 *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111);
3761 }
3762 goto done;
3763 }
3764
3765 case ARM64in_FromSP: {
3766 /* 10,0 10001 00 0..(12)..0 11111 dd MOV Xd, xsp */
3767 UInt dd = iregEnc(i->ARM64in.FromSP.dst);
3768 vassert(dd < 31);
3769 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd);
3770 goto done;
3771 }
3772
3773 case ARM64in_Mul: {
3774 /* 100 11011 110 mm 011111 nn dd UMULH Xd, Xn,Xm
3775 100 11011 010 mm 011111 nn dd SMULH Xd, Xn,Xm
3776 100 11011 000 mm 011111 nn dd MUL Xd, Xn,Xm
3777 */
3778 UInt dd = iregEnc(i->ARM64in.Mul.dst);
3779 UInt nn = iregEnc(i->ARM64in.Mul.argL);
3780 UInt mm = iregEnc(i->ARM64in.Mul.argR);
3781 vassert(dd < 31 && nn < 31 && mm < 31);
3782 switch (i->ARM64in.Mul.op) {
3783 case ARM64mul_ZX:
3784 *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd);
3785 goto done;
3786 case ARM64mul_SX:
3787 *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd);
3788 goto done;
3789 case ARM64mul_PLAIN:
3790 *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd);
3791 goto done;
3792 default:
3793 vassert(0);
3794 }
3795 goto bad;
3796 }
3797 case ARM64in_LdrEX: {
3798 /* 085F7C82 ldxrb w2, [x4]
3799 485F7C82 ldxrh w2, [x4]
3800 885F7C82 ldxr w2, [x4]
3801 C85F7C82 ldxr x2, [x4]
3802 */
3803 switch (i->ARM64in.LdrEX.szB) {
3804 case 1: *p++ = 0x085F7C82; goto done;
3805 case 2: *p++ = 0x485F7C82; goto done;
3806 case 4: *p++ = 0x885F7C82; goto done;
3807 case 8: *p++ = 0xC85F7C82; goto done;
3808 default: break;
3809 }
3810 goto bad;
3811 }
3812 case ARM64in_StrEX: {
3813 /* 08007C82 stxrb w0, w2, [x4]
3814 48007C82 stxrh w0, w2, [x4]
3815 88007C82 stxr w0, w2, [x4]
3816 C8007C82 stxr w0, x2, [x4]
3817 */
3818 switch (i->ARM64in.StrEX.szB) {
3819 case 1: *p++ = 0x08007C82; goto done;
3820 case 2: *p++ = 0x48007C82; goto done;
3821 case 4: *p++ = 0x88007C82; goto done;
3822 case 8: *p++ = 0xC8007C82; goto done;
3823 default: break;
3824 }
3825 goto bad;
3826 }
3827 case ARM64in_CAS: {
3828 /* This isn't simple. For an explanation see the comment in
3829 host_arm64_defs.h on the the definition of ARM64Instr case
3830 CAS. */
3831 /* Generate:
3832 -- one of:
3833 mov x8, x5 // AA0503E8
3834 and x8, x5, #0xFFFFFFFF // 92407CA8
3835 and x8, x5, #0xFFFF // 92403CA8
3836 and x8, x5, #0xFF // 92401CA8
3837
3838 -- one of:
3839 ldxr x1, [x3] // C85F7C61
3840 ldxr w1, [x3] // 885F7C61
3841 ldxrh w1, [x3] // 485F7C61
3842 ldxrb w1, [x3] // 085F7C61
3843
3844 -- always:
3845 cmp x1, x8 // EB08003F
3846 bne out // 54000061
3847
3848 -- one of:
3849 stxr w1, x7, [x3] // C8017C67
3850 stxr w1, w7, [x3] // 88017C67
3851 stxrh w1, w7, [x3] // 48017C67
3852 stxrb w1, w7, [x3] // 08017C67
3853
3854 -- always:
3855 eor x1, x5, x1 // CA0100A1
3856 out:
3857 */
3858 switch (i->ARM64in.CAS.szB) {
3859 case 8: *p++ = 0xAA0503E8; break;
3860 case 4: *p++ = 0x92407CA8; break;
3861 case 2: *p++ = 0x92403CA8; break;
3862 case 1: *p++ = 0x92401CA8; break;
3863 default: vassert(0);
3864 }
3865 switch (i->ARM64in.CAS.szB) {
3866 case 8: *p++ = 0xC85F7C61; break;
3867 case 4: *p++ = 0x885F7C61; break;
3868 case 2: *p++ = 0x485F7C61; break;
3869 case 1: *p++ = 0x085F7C61; break;
3870 }
3871 *p++ = 0xEB08003F;
3872 *p++ = 0x54000061;
3873 switch (i->ARM64in.CAS.szB) {
3874 case 8: *p++ = 0xC8017C67; break;
3875 case 4: *p++ = 0x88017C67; break;
3876 case 2: *p++ = 0x48017C67; break;
3877 case 1: *p++ = 0x08017C67; break;
3878 }
3879 *p++ = 0xCA0100A1;
3880 goto done;
3881 }
3882 case ARM64in_MFence: {
3883 *p++ = 0xD5033F9F; /* DSB sy */
3884 *p++ = 0xD5033FBF; /* DMB sy */
3885 *p++ = 0xD5033FDF; /* ISB */
3886 goto done;
3887 }
3888 case ARM64in_ClrEX: {
3889 *p++ = 0xD5033F5F; /* clrex #15 */
3890 goto done;
3891 }
3892 case ARM64in_VLdStH: {
3893 /* 01 111101 01 imm12 n t LDR Ht, [Xn|SP, #imm12 * 2]
3894 01 111101 00 imm12 n t STR Ht, [Xn|SP, #imm12 * 2]
3895 */
3896 UInt hD = dregEnc(i->ARM64in.VLdStH.hD);
3897 UInt rN = iregEnc(i->ARM64in.VLdStH.rN);
3898 UInt uimm12 = i->ARM64in.VLdStH.uimm12;
3899 Bool isLD = i->ARM64in.VLdStH.isLoad;
3900 vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
3901 uimm12 >>= 1;
3902 vassert(uimm12 < (1<<12));
3903 vassert(hD < 32);
3904 vassert(rN < 31);
3905 *p++ = X_2_6_2_12_5_5(X01, X111101, isLD ? X01 : X00,
3906 uimm12, rN, hD);
3907 goto done;
3908 }
3909 case ARM64in_VLdStS: {
3910 /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4]
3911 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4]
3912 */
3913 UInt sD = dregEnc(i->ARM64in.VLdStS.sD);
3914 UInt rN = iregEnc(i->ARM64in.VLdStS.rN);
3915 UInt uimm12 = i->ARM64in.VLdStS.uimm12;
3916 Bool isLD = i->ARM64in.VLdStS.isLoad;
3917 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
3918 uimm12 >>= 2;
3919 vassert(uimm12 < (1<<12));
3920 vassert(sD < 32);
3921 vassert(rN < 31);
3922 *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00,
3923 uimm12, rN, sD);
3924 goto done;
3925 }
3926 case ARM64in_VLdStD: {
3927 /* 11 111101 01 imm12 n t LDR Dt, [Xn|SP, #imm12 * 8]
3928 11 111101 00 imm12 n t STR Dt, [Xn|SP, #imm12 * 8]
3929 */
3930 UInt dD = dregEnc(i->ARM64in.VLdStD.dD);
3931 UInt rN = iregEnc(i->ARM64in.VLdStD.rN);
3932 UInt uimm12 = i->ARM64in.VLdStD.uimm12;
3933 Bool isLD = i->ARM64in.VLdStD.isLoad;
3934 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
3935 uimm12 >>= 3;
3936 vassert(uimm12 < (1<<12));
3937 vassert(dD < 32);
3938 vassert(rN < 31);
3939 *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00,
3940 uimm12, rN, dD);
3941 goto done;
3942 }
3943 case ARM64in_VLdStQ: {
3944 /* 0100 1100 0000 0000 0111 11 rN rQ st1 {vQ.2d}, [<rN|SP>]
3945 0100 1100 0100 0000 0111 11 rN rQ ld1 {vQ.2d}, [<rN|SP>]
3946 */
3947 UInt rQ = qregEnc(i->ARM64in.VLdStQ.rQ);
3948 UInt rN = iregEnc(i->ARM64in.VLdStQ.rN);
3949 vassert(rQ < 32);
3950 vassert(rN < 31);
3951 if (i->ARM64in.VLdStQ.isLoad) {
3952 *p++ = 0x4C407C00 | (rN << 5) | rQ;
3953 } else {
3954 *p++ = 0x4C007C00 | (rN << 5) | rQ;
3955 }
3956 goto done;
3957 }
3958 case ARM64in_VCvtI2F: {
3959 /* 31 28 23 21 20 18 15 9 4
3960 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
3961 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
3962 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
3963 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
3964 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
3965 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
3966 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
3967 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
3968 */
3969 UInt rN = iregEnc(i->ARM64in.VCvtI2F.rS);
3970 UInt rD = dregEnc(i->ARM64in.VCvtI2F.rD);
3971 ARM64CvtOp how = i->ARM64in.VCvtI2F.how;
3972 /* Just handle cases as they show up. */
3973 switch (how) {
3974 case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */
3975 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD);
3976 break;
3977 case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */
3978 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD);
3979 break;
3980 case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */
3981 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD);
3982 break;
3983 case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
3984 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
3985 break;
3986 case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
3987 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
3988 break;
3989 case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
3990 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
3991 break;
3992 case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */
3993 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD);
3994 break;
3995 case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn */
3996 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD);
3997 break;
3998 default:
3999 goto bad; //ATC
4000 }
4001 goto done;
4002 }
4003 case ARM64in_VCvtF2I: {
4004 /* 30 23 20 18 15 9 4
4005 sf 00,11110,0x 1 00 000,000000 n d FCVTNS Rd, Fn (round to
4006 sf 00,11110,0x 1 00 001,000000 n d FCVTNU Rd, Fn nearest)
4007 ---------------- 01 -------------- FCVTP-------- (round to +inf)
4008 ---------------- 10 -------------- FCVTM-------- (round to -inf)
4009 ---------------- 11 -------------- FCVTZ-------- (round to zero)
4010
4011 Rd is Xd when sf==1, Wd when sf==0
4012 Fn is Dn when x==1, Sn when x==0
4013 20:19 carry the rounding mode, using the same encoding as FPCR
4014 */
4015 UInt rD = iregEnc(i->ARM64in.VCvtF2I.rD);
4016 UInt rN = dregEnc(i->ARM64in.VCvtF2I.rS);
4017 ARM64CvtOp how = i->ARM64in.VCvtF2I.how;
4018 UChar armRM = i->ARM64in.VCvtF2I.armRM;
4019 /* Just handle cases as they show up. */
4020 switch (how) {
4021 case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
4022 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3),
4023 X000000, rN, rD);
4024 break;
4025 case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
4026 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3),
4027 X000000, rN, rD);
4028 break;
4029 case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
4030 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3),
4031 X000000, rN, rD);
4032 break;
4033 case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
4034 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
4035 X000000, rN, rD);
4036 break;
4037 case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
4038 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
4039 X000000, rN, rD);
4040 break;
4041 case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
4042 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
4043 X000000, rN, rD);
4044 break;
4045 case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
4046 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
4047 X000000, rN, rD);
4048 break;
4049 case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
4050 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
4051 X000000, rN, rD);
4052 break;
4053 default:
4054 goto bad; //ATC
4055 }
4056 goto done;
4057 }
4058 case ARM64in_VCvtSD: {
4059 /* 31 23 21 16 14 9 4
4060 000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D)
4061 ---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S)
4062 Rounding, when dst is smaller than src, is per the FPCR.
4063 */
4064 UInt dd = dregEnc(i->ARM64in.VCvtSD.dst);
4065 UInt nn = dregEnc(i->ARM64in.VCvtSD.src);
4066 if (i->ARM64in.VCvtSD.sToD) {
4067 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd);
4068 } else {
4069 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd);
4070 }
4071 goto done;
4072 }
4073 case ARM64in_VCvtHS: {
4074 /* 31 23 21 16 14 9 4
4075 000,11110, 11 10001 0,0 10000 n d FCVT Sd, Hn (H->S)
4076 ---------- 00 ----- 1,1 --------- FCVT Hd, Sn (S->H)
4077 Rounding, when dst is smaller than src, is per the FPCR.
4078 */
4079 UInt dd = dregEnc(i->ARM64in.VCvtHS.dst);
4080 UInt nn = dregEnc(i->ARM64in.VCvtHS.src);
4081 if (i->ARM64in.VCvtHS.hToS) {
4082 *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X010000, nn, dd);
4083 } else {
4084 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X110000, nn, dd);
4085 }
4086 goto done;
4087 }
4088 case ARM64in_VCvtHD: {
4089 /* 31 23 21 16 14 9 4
4090 000,11110, 11 10001 0,1 10000 n d FCVT Dd, Hn (H->D)
4091 ---------- 01 ----- 1,1 --------- FCVT Hd, Dn (D->H)
4092 Rounding, when dst is smaller than src, is per the FPCR.
4093 */
4094 UInt dd = dregEnc(i->ARM64in.VCvtHD.dst);
4095 UInt nn = dregEnc(i->ARM64in.VCvtHD.src);
4096 if (i->ARM64in.VCvtHD.hToD) {
4097 *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X110000, nn, dd);
4098 } else {
4099 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X110000, nn, dd);
4100 }
4101 goto done;
4102 }
4103 case ARM64in_VUnaryD: {
4104 /* 31 23 21 16 14 9 4
4105 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled)
4106 ------------------- 0,1 --------- FABS ------
4107 ------------------- 1,0 --------- FNEG ------
4108 ------------------- 1,1 --------- FSQRT -----
4109 */
4110 UInt dD = dregEnc(i->ARM64in.VUnaryD.dst);
4111 UInt dN = dregEnc(i->ARM64in.VUnaryD.src);
4112 UInt b16 = 2; /* impossible */
4113 UInt b15 = 2; /* impossible */
4114 switch (i->ARM64in.VUnaryD.op) {
4115 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4116 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4117 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4118 default: break;
4119 }
4120 if (b16 < 2 && b15 < 2) {
4121 *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16,
4122 (b15 << 5) | X10000, dN, dD);
4123 goto done;
4124 }
4125 /*
4126 000, 11110 01 1,001 11,1 10000 n d FRINTI Dd, Dm (round per FPCR)
4127 */
4128 if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) {
4129 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
4130 goto done;
4131 }
4132 /*
4133 010, 11110 11 1,0000 1,1111 10 n d FRECPX Dd, Dm
4134 */
4135 if (i->ARM64in.VUnaryD.op == ARM64fpu_RECPX) {
4136 *p++ = X_3_8_5_6_5_5(X010, X11110111, X00001, X111110, dN, dD);
4137 goto done;
4138 }
4139 goto bad;
4140 }
4141 case ARM64in_VUnaryS: {
4142 /* 31 23 21 16 14 9 4
4143 000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled)
4144 ------------------- 0,1 --------- FABS ------
4145 ------------------- 1,0 --------- FNEG ------
4146 ------------------- 1,1 --------- FSQRT -----
4147 */
4148 UInt sD = dregEnc(i->ARM64in.VUnaryS.dst);
4149 UInt sN = dregEnc(i->ARM64in.VUnaryS.src);
4150 UInt b16 = 2; /* impossible */
4151 UInt b15 = 2; /* impossible */
4152 switch (i->ARM64in.VUnaryS.op) {
4153 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4154 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4155 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4156 default: break;
4157 }
4158 if (b16 < 2 && b15 < 2) {
4159 *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16,
4160 (b15 << 5) | X10000, sN, sD);
4161 goto done;
4162 }
4163 /*
4164 000, 11110 00 1,001 11,1 10000 n d FRINTI Sd, Sm (round per FPCR)
4165 */
4166 if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) {
4167 *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
4168 goto done;
4169 }
4170 /*
4171 010, 11110 10 1,0000 1,1111 10 n d FRECPX Sd, Sm
4172 */
4173 if (i->ARM64in.VUnaryS.op == ARM64fpu_RECPX) {
4174 *p++ = X_3_8_5_6_5_5(X010, X11110101, X00001, X111110, sN, sD);
4175 goto done;
4176 }
4177 goto bad;
4178 }
4179 case ARM64in_VBinD: {
4180 /* 31 23 20 15 11 9 4
4181 ---------------- 0000 ------ FMUL --------
4182 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
4183 ---------------- 0010 ------ FADD --------
4184 ---------------- 0011 ------ FSUB --------
4185 */
4186 UInt dD = dregEnc(i->ARM64in.VBinD.dst);
4187 UInt dN = dregEnc(i->ARM64in.VBinD.argL);
4188 UInt dM = dregEnc(i->ARM64in.VBinD.argR);
4189 UInt b1512 = 16; /* impossible */
4190 switch (i->ARM64in.VBinD.op) {
4191 case ARM64fpb_DIV: b1512 = X0001; break;
4192 case ARM64fpb_MUL: b1512 = X0000; break;
4193 case ARM64fpb_SUB: b1512 = X0011; break;
4194 case ARM64fpb_ADD: b1512 = X0010; break;
4195 default: goto bad;
4196 }
4197 vassert(b1512 < 16);
4198 *p++
4199 = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD);
4200 goto done;
4201 }
4202 case ARM64in_VBinS: {
4203 /* 31 23 20 15 11 9 4
4204 ---------------- 0000 ------ FMUL --------
4205 000 11110 001 m 0001 10 n d FDIV Dd,Dn,Dm
4206 ---------------- 0010 ------ FADD --------
4207 ---------------- 0011 ------ FSUB --------
4208 */
4209 UInt sD = dregEnc(i->ARM64in.VBinS.dst);
4210 UInt sN = dregEnc(i->ARM64in.VBinS.argL);
4211 UInt sM = dregEnc(i->ARM64in.VBinS.argR);
4212 UInt b1512 = 16; /* impossible */
4213 switch (i->ARM64in.VBinS.op) {
4214 case ARM64fpb_DIV: b1512 = X0001; break;
4215 case ARM64fpb_MUL: b1512 = X0000; break;
4216 case ARM64fpb_SUB: b1512 = X0011; break;
4217 case ARM64fpb_ADD: b1512 = X0010; break;
4218 default: goto bad;
4219 }
4220 vassert(b1512 < 16);
4221 *p++
4222 = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
4223 goto done;
4224 }
4225 case ARM64in_VCmpD: {
4226 /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */
4227 UInt dN = dregEnc(i->ARM64in.VCmpD.argL);
4228 UInt dM = dregEnc(i->ARM64in.VCmpD.argR);
4229 *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000);
4230 goto done;
4231 }
4232 case ARM64in_VCmpS: {
4233 /* 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm */
4234 UInt sN = dregEnc(i->ARM64in.VCmpS.argL);
4235 UInt sM = dregEnc(i->ARM64in.VCmpS.argR);
4236 *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000);
4237 goto done;
4238 }
4239 case ARM64in_VFCSel: {
4240 /* 31 23 21 20 15 11 9 5
4241 000 11110 00 1 m cond 11 n d FCSEL Sd,Sn,Sm,cond
4242 000 11110 01 1 m cond 11 n d FCSEL Dd,Dn,Dm,cond
4243 */
4244 Bool isD = i->ARM64in.VFCSel.isD;
4245 UInt dd = dregEnc(i->ARM64in.VFCSel.dst);
4246 UInt nn = dregEnc(i->ARM64in.VFCSel.argL);
4247 UInt mm = dregEnc(i->ARM64in.VFCSel.argR);
4248 UInt cond = (UInt)i->ARM64in.VFCSel.cond;
4249 vassert(cond < 16);
4250 *p++ = X_3_8_5_6_5_5(X000, isD ? X11110011 : X11110001,
4251 mm, (cond << 2) | X000011, nn, dd);
4252 goto done;
4253 }
4254 case ARM64in_FPCR: {
4255 Bool toFPCR = i->ARM64in.FPCR.toFPCR;
4256 UInt iReg = iregEnc(i->ARM64in.FPCR.iReg);
4257 if (toFPCR) {
4258 /* 0xD51B44 000 Rt MSR fpcr, rT */
4259 *p++ = 0xD51B4400 | (iReg & 0x1F);
4260 goto done;
4261 }
4262 goto bad; // FPCR -> iReg case currently ATC
4263 }
4264 case ARM64in_FPSR: {
4265 Bool toFPSR = i->ARM64in.FPSR.toFPSR;
4266 UInt iReg = iregEnc(i->ARM64in.FPSR.iReg);
4267 if (toFPSR) {
4268 /* 0xD51B44 001 Rt MSR fpsr, rT */
4269 *p++ = 0xD51B4420 | (iReg & 0x1F);
4270 } else {
4271 /* 0xD53B44 001 Rt MRS rT, fpsr */
4272 *p++ = 0xD53B4420 | (iReg & 0x1F);
4273 }
4274 goto done;
4275 }
4276 case ARM64in_VBinV: {
4277 /* 31 23 20 15 9 4
4278 010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d
4279 010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s
4280 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h
4281 010 01110 00 1 m 100001 n d ADD Vd.16b, Vn.16b, Vm.16b
4282
4283 011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d
4284 011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s
4285 011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h
4286 011 01110 00 1 m 100001 n d SUB Vd.16b, Vn.16b, Vm.16b
4287
4288 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s
4289 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h
4290 010 01110 00 1 m 100111 n d MUL Vd.16b, Vn.16b, Vm.16b
4291
4292 010 01110 01 1 m 110101 n d FADD Vd.2d, Vn.2d, Vm.2d
4293 010 01110 00 1 m 110101 n d FADD Vd.4s, Vn.4s, Vm.4s
4294 010 01110 11 1 m 110101 n d FSUB Vd.2d, Vn.2d, Vm.2d
4295 010 01110 10 1 m 110101 n d FSUB Vd.4s, Vn.4s, Vm.4s
4296
4297 011 01110 01 1 m 110111 n d FMUL Vd.2d, Vn.2d, Vm.2d
4298 011 01110 00 1 m 110111 n d FMUL Vd.4s, Vn.4s, Vm.4s
4299 011 01110 01 1 m 111111 n d FDIV Vd.2d, Vn.2d, Vm.2d
4300 011 01110 00 1 m 111111 n d FDIV Vd.4s, Vn.4s, Vm.4s
4301
4302 010 01110 01 1 m 111101 n d FMAX Vd.2d, Vn.2d, Vm.2d
4303 010 01110 00 1 m 111101 n d FMAX Vd.4s, Vn.4s, Vm.4s
4304 010 01110 11 1 m 111101 n d FMIN Vd.2d, Vn.2d, Vm.2d
4305 010 01110 10 1 m 111101 n d FMIN Vd.4s, Vn.4s, Vm.4s
4306
4307 011 01110 10 1 m 011001 n d UMAX Vd.4s, Vn.4s, Vm.4s
4308 011 01110 01 1 m 011001 n d UMAX Vd.8h, Vn.8h, Vm.8h
4309 011 01110 00 1 m 011001 n d UMAX Vd.16b, Vn.16b, Vm.16b
4310
4311 011 01110 10 1 m 011011 n d UMIN Vd.4s, Vn.4s, Vm.4s
4312 011 01110 01 1 m 011011 n d UMIN Vd.8h, Vn.8h, Vm.8h
4313 011 01110 00 1 m 011011 n d UMIN Vd.16b, Vn.16b, Vm.16b
4314
4315 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s
4316 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h
4317 010 01110 00 1 m 011001 n d SMAX Vd.16b, Vn.16b, Vm.16b
4318
4319 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s
4320 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h
4321 010 01110 00 1 m 011011 n d SMIN Vd.16b, Vn.16b, Vm.16b
4322
4323 010 01110 00 1 m 000111 n d AND Vd, Vn, Vm
4324 010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm
4325 011 01110 00 1 m 000111 n d EOR Vd, Vn, Vm
4326
4327 011 01110 11 1 m 100011 n d CMEQ Vd.2d, Vn.2d, Vm.2d
4328 011 01110 10 1 m 100011 n d CMEQ Vd.4s, Vn.4s, Vm.4s
4329 011 01110 01 1 m 100011 n d CMEQ Vd.8h, Vn.8h, Vm.8h
4330 011 01110 00 1 m 100011 n d CMEQ Vd.16b, Vn.16b, Vm.16b
4331
4332 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d
4333 011 01110 10 1 m 001101 n d CMHI Vd.4s, Vn.4s, Vm.4s
4334 011 01110 01 1 m 001101 n d CMHI Vd.8h, Vn.8h, Vm.8h
4335 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b
4336
4337 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d
4338 010 01110 10 1 m 001101 n d CMGT Vd.4s, Vn.4s, Vm.4s
4339 010 01110 01 1 m 001101 n d CMGT Vd.8h, Vn.8h, Vm.8h
4340 010 01110 00 1 m 001101 n d CMGT Vd.16b, Vn.16b, Vm.16b
4341
4342 010 01110 01 1 m 111001 n d FCMEQ Vd.2d, Vn.2d, Vm.2d
4343 010 01110 00 1 m 111001 n d FCMEQ Vd.4s, Vn.4s, Vm.4s
4344
4345 011 01110 01 1 m 111001 n d FCMGE Vd.2d, Vn.2d, Vm.2d
4346 011 01110 00 1 m 111001 n d FCMGE Vd.4s, Vn.4s, Vm.4s
4347
4348 011 01110 11 1 m 111001 n d FCMGT Vd.2d, Vn.2d, Vm.2d
4349 011 01110 10 1 m 111001 n d FCMGT Vd.4s, Vn.4s, Vm.4s
4350
4351 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b
4352
4353 010 01110 11 0 m 000110 n d UZP1 Vd.2d, Vn.2d, Vm.2d
4354 010 01110 10 0 m 000110 n d UZP1 Vd.4s, Vn.4s, Vm.4s
4355 010 01110 01 0 m 000110 n d UZP1 Vd.8h, Vn.8h, Vm.8h
4356 010 01110 00 0 m 000110 n d UZP1 Vd.16b, Vn.16b, Vm.16b
4357
4358 010 01110 11 0 m 010110 n d UZP2 Vd.2d, Vn.2d, Vm.2d
4359 010 01110 10 0 m 010110 n d UZP2 Vd.4s, Vn.4s, Vm.4s
4360 010 01110 01 0 m 010110 n d UZP2 Vd.8h, Vn.8h, Vm.8h
4361 010 01110 00 0 m 010110 n d UZP2 Vd.16b, Vn.16b, Vm.16b
4362
4363 010 01110 10 0 m 001110 n d ZIP1 Vd.4s, Vn.4s, Vm.4s
4364 010 01110 01 0 m 001110 n d ZIP1 Vd.8h, Vn.8h, Vm.8h
4365 010 01110 10 0 m 001110 n d ZIP1 Vd.16b, Vn.16b, Vm.16b
4366
4367 010 01110 10 0 m 011110 n d ZIP2 Vd.4s, Vn.4s, Vm.4s
4368 010 01110 01 0 m 011110 n d ZIP2 Vd.8h, Vn.8h, Vm.8h
4369 010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b
4370
4371 011 01110 00 1 m 100111 n d PMUL Vd.16b, Vn.16b, Vm.16b
4372
4373 000 01110 00 1 m 111000 n d PMULL Vd.8h, Vn.8b, Vm.8b
4374
4375 001 01110 10 1 m 110000 n d UMULL Vd.2d, Vn.2s, Vm.2s
4376 001 01110 01 1 m 110000 n d UMULL Vd.4s, Vn.4h, Vm.4h
4377 001 01110 00 1 m 110000 n d UMULL Vd.8h, Vn.8b, Vm.8b
4378
4379 000 01110 10 1 m 110000 n d SMULL Vd.2d, Vn.2s, Vm.2s
4380 000 01110 01 1 m 110000 n d SMULL Vd.4s, Vn.4h, Vm.4h
4381 000 01110 00 1 m 110000 n d SMULL Vd.8h, Vn.8b, Vm.8b
4382
4383 010 01110 11 1 m 000011 n d SQADD Vd.2d, Vn.2d, Vm.2d
4384 010 01110 10 1 m 000011 n d SQADD Vd.4s, Vn.4s, Vm.4s
4385 010 01110 01 1 m 000011 n d SQADD Vd.8h, Vn.8h, Vm.8h
4386 010 01110 00 1 m 000011 n d SQADD Vd.16b, Vn.16b, Vm.16b
4387
4388 011 01110 11 1 m 000011 n d UQADD Vd.2d, Vn.2d, Vm.2d
4389 011 01110 10 1 m 000011 n d UQADD Vd.4s, Vn.4s, Vm.4s
4390 011 01110 01 1 m 000011 n d UQADD Vd.8h, Vn.8h, Vm.8h
4391 011 01110 00 1 m 000011 n d UQADD Vd.16b, Vn.16b, Vm.16b
4392
4393 010 01110 11 1 m 001011 n d SQSUB Vd.2d, Vn.2d, Vm.2d
4394 010 01110 10 1 m 001011 n d SQSUB Vd.4s, Vn.4s, Vm.4s
4395 010 01110 01 1 m 001011 n d SQSUB Vd.8h, Vn.8h, Vm.8h
4396 010 01110 00 1 m 001011 n d SQSUB Vd.16b, Vn.16b, Vm.16b
4397
4398 011 01110 11 1 m 001011 n d UQSUB Vd.2d, Vn.2d, Vm.2d
4399 011 01110 10 1 m 001011 n d UQSUB Vd.4s, Vn.4s, Vm.4s
4400 011 01110 01 1 m 001011 n d UQSUB Vd.8h, Vn.8h, Vm.8h
4401 011 01110 00 1 m 001011 n d UQSUB Vd.16b, Vn.16b, Vm.16b
4402
4403 000 01110 10 1 m 110100 n d SQDMULL Vd.2d, Vn.2s, Vm.2s
4404 000 01110 01 1 m 110100 n d SQDMULL Vd.4s, Vn.4h, Vm.4h
4405
4406 010 01110 10 1 m 101101 n d SQDMULH Vd.4s, Vn.4s, Vm.4s
4407 010 01110 01 1 m 101101 n d SQDMULH Vd.8h, Vn.8h, Vm.8h
4408 011 01110 10 1 m 101101 n d SQRDMULH Vd.4s, Vn.4s, Vm.4s
4409 011 01110 10 1 m 101101 n d SQRDMULH Vd.8h, Vn.8h, Vm.8h
4410
4411 010 01110 sz 1 m 010011 n d SQSHL@sz Vd, Vn, Vm
4412 010 01110 sz 1 m 010111 n d SQRSHL@sz Vd, Vn, Vm
4413 011 01110 sz 1 m 010011 n d UQSHL@sz Vd, Vn, Vm
4414 011 01110 sz 1 m 010111 n d URQSHL@sz Vd, Vn, Vm
4415
4416 010 01110 sz 1 m 010001 n d SSHL@sz Vd, Vn, Vm
4417 010 01110 sz 1 m 010101 n d SRSHL@sz Vd, Vn, Vm
4418 011 01110 sz 1 m 010001 n d USHL@sz Vd, Vn, Vm
4419 011 01110 sz 1 m 010101 n d URSHL@sz Vd, Vn, Vm
4420
4421 010 01110 01 1 m 111111 n d FRECPS Vd.2d, Vn.2d, Vm.2d
4422 010 01110 00 1 m 111111 n d FRECPS Vd.4s, Vn.4s, Vm.4s
4423 010 01110 11 1 m 111111 n d FRSQRTS Vd.2d, Vn.2d, Vm.2d
4424 010 01110 10 1 m 111111 n d FRSQRTS Vd.4s, Vn.4s, Vm.4s
4425 */
4426 UInt vD = qregEnc(i->ARM64in.VBinV.dst);
4427 UInt vN = qregEnc(i->ARM64in.VBinV.argL);
4428 UInt vM = qregEnc(i->ARM64in.VBinV.argR);
4429 switch (i->ARM64in.VBinV.op) {
4430 case ARM64vecb_ADD64x2:
4431 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
4432 break;
4433 case ARM64vecb_ADD32x4:
4434 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD);
4435 break;
4436 case ARM64vecb_ADD16x8:
4437 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
4438 break;
4439 case ARM64vecb_ADD8x16:
4440 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD);
4441 break;
4442 case ARM64vecb_SUB64x2:
4443 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
4444 break;
4445 case ARM64vecb_SUB32x4:
4446 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100001, vN, vD);
4447 break;
4448 case ARM64vecb_SUB16x8:
4449 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
4450 break;
4451 case ARM64vecb_SUB8x16:
4452 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD);
4453 break;
4454 case ARM64vecb_MUL32x4:
4455 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
4456 break;
4457 case ARM64vecb_MUL16x8:
4458 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD);
4459 break;
4460 case ARM64vecb_MUL8x16:
4461 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD);
4462 break;
4463 case ARM64vecb_FADD64x2:
4464 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD);
4465 break;
4466 case ARM64vecb_FADD32x4:
4467 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X110101, vN, vD);
4468 break;
4469 case ARM64vecb_FSUB64x2:
4470 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X110101, vN, vD);
4471 break;
4472 case ARM64vecb_FSUB32x4:
4473 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X110101, vN, vD);
4474 break;
4475 case ARM64vecb_FMUL64x2:
4476 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X110111, vN, vD);
4477 break;
4478 case ARM64vecb_FMUL32x4:
4479 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X110111, vN, vD);
4480 break;
4481 case ARM64vecb_FDIV64x2:
4482 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111111, vN, vD);
4483 break;
4484 case ARM64vecb_FDIV32x4:
4485 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD);
4486 break;
4487
4488 case ARM64vecb_FMAX64x2:
4489 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111101, vN, vD);
4490 break;
4491 case ARM64vecb_FMAX32x4:
4492 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111101, vN, vD);
4493 break;
4494 case ARM64vecb_FMIN64x2:
4495 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111101, vN, vD);
4496 break;
4497 case ARM64vecb_FMIN32x4:
4498 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111101, vN, vD);
4499 break;
4500
4501 case ARM64vecb_UMAX32x4:
4502 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD);
4503 break;
4504 case ARM64vecb_UMAX16x8:
4505 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011001, vN, vD);
4506 break;
4507 case ARM64vecb_UMAX8x16:
4508 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011001, vN, vD);
4509 break;
4510
4511 case ARM64vecb_UMIN32x4:
4512 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011011, vN, vD);
4513 break;
4514 case ARM64vecb_UMIN16x8:
4515 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD);
4516 break;
4517 case ARM64vecb_UMIN8x16:
4518 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD);
4519 break;
4520
4521 case ARM64vecb_SMAX32x4:
4522 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
4523 break;
4524 case ARM64vecb_SMAX16x8:
4525 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
4526 break;
4527 case ARM64vecb_SMAX8x16:
4528 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD);
4529 break;
4530
4531 case ARM64vecb_SMIN32x4:
4532 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
4533 break;
4534 case ARM64vecb_SMIN16x8:
4535 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
4536 break;
4537 case ARM64vecb_SMIN8x16:
4538 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD);
4539 break;
4540
4541 case ARM64vecb_AND:
4542 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD);
4543 break;
4544 case ARM64vecb_ORR:
4545 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD);
4546 break;
4547 case ARM64vecb_XOR:
4548 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD);
4549 break;
4550
4551 case ARM64vecb_CMEQ64x2:
4552 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD);
4553 break;
4554 case ARM64vecb_CMEQ32x4:
4555 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100011, vN, vD);
4556 break;
4557 case ARM64vecb_CMEQ16x8:
4558 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100011, vN, vD);
4559 break;
4560 case ARM64vecb_CMEQ8x16:
4561 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD);
4562 break;
4563
4564 case ARM64vecb_CMHI64x2:
4565 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001101, vN, vD);
4566 break;
4567 case ARM64vecb_CMHI32x4:
4568 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001101, vN, vD);
4569 break;
4570 case ARM64vecb_CMHI16x8:
4571 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001101, vN, vD);
4572 break;
4573 case ARM64vecb_CMHI8x16:
4574 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001101, vN, vD);
4575 break;
4576
4577 case ARM64vecb_CMGT64x2:
4578 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001101, vN, vD);
4579 break;
4580 case ARM64vecb_CMGT32x4:
4581 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001101, vN, vD);
4582 break;
4583 case ARM64vecb_CMGT16x8:
4584 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001101, vN, vD);
4585 break;
4586 case ARM64vecb_CMGT8x16:
4587 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001101, vN, vD);
4588 break;
4589
4590 case ARM64vecb_FCMEQ64x2:
4591 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD);
4592 break;
4593 case ARM64vecb_FCMEQ32x4:
4594 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD);
4595 break;
4596
4597 case ARM64vecb_FCMGE64x2:
4598 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD);
4599 break;
4600 case ARM64vecb_FCMGE32x4:
4601 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD);
4602 break;
4603
4604 case ARM64vecb_FCMGT64x2:
4605 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD);
4606 break;
4607 case ARM64vecb_FCMGT32x4:
4608 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
4609 break;
4610
4611 case ARM64vecb_TBL1:
4612 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD);
4613 break;
4614
4615 case ARM64vecb_UZP164x2:
4616 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X000110, vN, vD);
4617 break;
4618 case ARM64vecb_UZP132x4:
4619 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X000110, vN, vD);
4620 break;
4621 case ARM64vecb_UZP116x8:
4622 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X000110, vN, vD);
4623 break;
4624 case ARM64vecb_UZP18x16:
4625 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000110, vN, vD);
4626 break;
4627
4628 case ARM64vecb_UZP264x2:
4629 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X010110, vN, vD);
4630 break;
4631 case ARM64vecb_UZP232x4:
4632 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X010110, vN, vD);
4633 break;
4634 case ARM64vecb_UZP216x8:
4635 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X010110, vN, vD);
4636 break;
4637 case ARM64vecb_UZP28x16:
4638 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X010110, vN, vD);
4639 break;
4640
4641 case ARM64vecb_ZIP132x4:
4642 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X001110, vN, vD);
4643 break;
4644 case ARM64vecb_ZIP116x8:
4645 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X001110, vN, vD);
4646 break;
4647 case ARM64vecb_ZIP18x16:
4648 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X001110, vN, vD);
4649 break;
4650
4651 case ARM64vecb_ZIP232x4:
4652 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X011110, vN, vD);
4653 break;
4654 case ARM64vecb_ZIP216x8:
4655 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X011110, vN, vD);
4656 break;
4657 case ARM64vecb_ZIP28x16:
4658 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X011110, vN, vD);
4659 break;
4660
4661 case ARM64vecb_PMUL8x16:
4662 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD);
4663 break;
4664
4665 case ARM64vecb_PMULL8x8:
4666 *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X111000, vN, vD);
4667 break;
4668
4669 case ARM64vecb_UMULL2DSS:
4670 *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD);
4671 break;
4672 case ARM64vecb_UMULL4SHH:
4673 *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD);
4674 break;
4675 case ARM64vecb_UMULL8HBB:
4676 *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD);
4677 break;
4678
4679 case ARM64vecb_SMULL2DSS:
4680 *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110000, vN, vD);
4681 break;
4682 case ARM64vecb_SMULL4SHH:
4683 *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110000, vN, vD);
4684 break;
4685 case ARM64vecb_SMULL8HBB:
4686 *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X110000, vN, vD);
4687 break;
4688
4689 case ARM64vecb_SQADD64x2:
4690 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X000011, vN, vD);
4691 break;
4692 case ARM64vecb_SQADD32x4:
4693 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000011, vN, vD);
4694 break;
4695 case ARM64vecb_SQADD16x8:
4696 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X000011, vN, vD);
4697 break;
4698 case ARM64vecb_SQADD8x16:
4699 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000011, vN, vD);
4700 break;
4701
4702 case ARM64vecb_UQADD64x2:
4703 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X000011, vN, vD);
4704 break;
4705 case ARM64vecb_UQADD32x4:
4706 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X000011, vN, vD);
4707 break;
4708 case ARM64vecb_UQADD16x8:
4709 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X000011, vN, vD);
4710 break;
4711 case ARM64vecb_UQADD8x16:
4712 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000011, vN, vD);
4713 break;
4714
4715 case ARM64vecb_SQSUB64x2:
4716 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001011, vN, vD);
4717 break;
4718 case ARM64vecb_SQSUB32x4:
4719 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001011, vN, vD);
4720 break;
4721 case ARM64vecb_SQSUB16x8:
4722 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001011, vN, vD);
4723 break;
4724 case ARM64vecb_SQSUB8x16:
4725 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001011, vN, vD);
4726 break;
4727
4728 case ARM64vecb_UQSUB64x2:
4729 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001011, vN, vD);
4730 break;
4731 case ARM64vecb_UQSUB32x4:
4732 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001011, vN, vD);
4733 break;
4734 case ARM64vecb_UQSUB16x8:
4735 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001011, vN, vD);
4736 break;
4737 case ARM64vecb_UQSUB8x16:
4738 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001011, vN, vD);
4739 break;
4740
4741 case ARM64vecb_SQDMULL2DSS:
4742 *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110100, vN, vD);
4743 break;
4744 case ARM64vecb_SQDMULL4SHH:
4745 *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110100, vN, vD);
4746 break;
4747
4748 case ARM64vecb_SQDMULH32x4:
4749 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X101101, vN, vD);
4750 break;
4751 case ARM64vecb_SQDMULH16x8:
4752 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X101101, vN, vD);
4753 break;
4754 case ARM64vecb_SQRDMULH32x4:
4755 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X101101, vN, vD);
4756 break;
4757 case ARM64vecb_SQRDMULH16x8:
4758 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X101101, vN, vD);
4759 break;
4760
4761 case ARM64vecb_SQSHL64x2:
4762 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010011, vN, vD);
4763 break;
4764 case ARM64vecb_SQSHL32x4:
4765 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010011, vN, vD);
4766 break;
4767 case ARM64vecb_SQSHL16x8:
4768 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010011, vN, vD);
4769 break;
4770 case ARM64vecb_SQSHL8x16:
4771 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010011, vN, vD);
4772 break;
4773
4774 case ARM64vecb_SQRSHL64x2:
4775 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010111, vN, vD);
4776 break;
4777 case ARM64vecb_SQRSHL32x4:
4778 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010111, vN, vD);
4779 break;
4780 case ARM64vecb_SQRSHL16x8:
4781 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010111, vN, vD);
4782 break;
4783 case ARM64vecb_SQRSHL8x16:
4784 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010111, vN, vD);
4785 break;
4786
4787 case ARM64vecb_UQSHL64x2:
4788 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010011, vN, vD);
4789 break;
4790 case ARM64vecb_UQSHL32x4:
4791 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010011, vN, vD);
4792 break;
4793 case ARM64vecb_UQSHL16x8:
4794 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010011, vN, vD);
4795 break;
4796 case ARM64vecb_UQSHL8x16:
4797 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010011, vN, vD);
4798 break;
4799
4800 case ARM64vecb_UQRSHL64x2:
4801 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010111, vN, vD);
4802 break;
4803 case ARM64vecb_UQRSHL32x4:
4804 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010111, vN, vD);
4805 break;
4806 case ARM64vecb_UQRSHL16x8:
4807 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010111, vN, vD);
4808 break;
4809 case ARM64vecb_UQRSHL8x16:
4810 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010111, vN, vD);
4811 break;
4812
4813 case ARM64vecb_SSHL64x2:
4814 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010001, vN, vD);
4815 break;
4816 case ARM64vecb_SSHL32x4:
4817 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010001, vN, vD);
4818 break;
4819 case ARM64vecb_SSHL16x8:
4820 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010001, vN, vD);
4821 break;
4822 case ARM64vecb_SSHL8x16:
4823 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010001, vN, vD);
4824 break;
4825
4826 case ARM64vecb_SRSHL64x2:
4827 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010101, vN, vD);
4828 break;
4829 case ARM64vecb_SRSHL32x4:
4830 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010101, vN, vD);
4831 break;
4832 case ARM64vecb_SRSHL16x8:
4833 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010101, vN, vD);
4834 break;
4835 case ARM64vecb_SRSHL8x16:
4836 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010101, vN, vD);
4837 break;
4838
4839 case ARM64vecb_USHL64x2:
4840 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010001, vN, vD);
4841 break;
4842 case ARM64vecb_USHL32x4:
4843 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010001, vN, vD);
4844 break;
4845 case ARM64vecb_USHL16x8:
4846 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010001, vN, vD);
4847 break;
4848 case ARM64vecb_USHL8x16:
4849 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010001, vN, vD);
4850 break;
4851
4852 case ARM64vecb_URSHL64x2:
4853 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010101, vN, vD);
4854 break;
4855 case ARM64vecb_URSHL32x4:
4856 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010101, vN, vD);
4857 break;
4858 case ARM64vecb_URSHL16x8:
4859 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010101, vN, vD);
4860 break;
4861 case ARM64vecb_URSHL8x16:
4862 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010101, vN, vD);
4863 break;
4864
4865 case ARM64vecb_FRECPS64x2:
4866 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111111, vN, vD);
4867 break;
4868 case ARM64vecb_FRECPS32x4:
4869 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111111, vN, vD);
4870 break;
4871 case ARM64vecb_FRSQRTS64x2:
4872 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111111, vN, vD);
4873 break;
4874 case ARM64vecb_FRSQRTS32x4:
4875 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111111, vN, vD);
4876 break;
4877
4878 default:
4879 goto bad;
4880 }
4881 goto done;
4882 }
4883 case ARM64in_VModifyV: {
4884 /* 31 23 20 15 9 4
4885 010 01110 sz 1 00000 001110 n d SUQADD@sz Vd, Vn
4886 011 01110 sz 1 00000 001110 n d USQADD@sz Vd, Vn
4887 */
4888 UInt vD = qregEnc(i->ARM64in.VModifyV.mod);
4889 UInt vN = qregEnc(i->ARM64in.VModifyV.arg);
4890 switch (i->ARM64in.VModifyV.op) {
4891 case ARM64vecmo_SUQADD64x2:
4892 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X001110, vN, vD);
4893 break;
4894 case ARM64vecmo_SUQADD32x4:
4895 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X001110, vN, vD);
4896 break;
4897 case ARM64vecmo_SUQADD16x8:
4898 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X001110, vN, vD);
4899 break;
4900 case ARM64vecmo_SUQADD8x16:
4901 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X001110, vN, vD);
4902 break;
4903 case ARM64vecmo_USQADD64x2:
4904 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X001110, vN, vD);
4905 break;
4906 case ARM64vecmo_USQADD32x4:
4907 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X001110, vN, vD);
4908 break;
4909 case ARM64vecmo_USQADD16x8:
4910 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X001110, vN, vD);
4911 break;
4912 case ARM64vecmo_USQADD8x16:
4913 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X001110, vN, vD);
4914 break;
4915 default:
4916 goto bad;
4917 }
4918 goto done;
4919 }
4920 case ARM64in_VUnaryV: {
4921 /* 31 23 20 15 9 4
4922 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d
4923 010 01110 10 1 00000 111110 n d FABS Vd.4s, Vn.4s
4924 011 01110 11 1 00000 111110 n d FNEG Vd.2d, Vn.2d
4925 011 01110 10 1 00000 111110 n d FNEG Vd.4s, Vn.4s
4926 011 01110 00 1 00000 010110 n d NOT Vd.16b, Vn.16b
4927
4928 010 01110 11 1 00000 101110 n d ABS Vd.2d, Vn.2d
4929 010 01110 10 1 00000 101110 n d ABS Vd.4s, Vn.4s
4930 010 01110 01 1 00000 101110 n d ABS Vd.8h, Vn.8h
4931 010 01110 00 1 00000 101110 n d ABS Vd.16b, Vn.16b
4932
4933 010 01110 10 1 00000 010010 n d CLS Vd.4s, Vn.4s
4934 010 01110 01 1 00000 010010 n d CLS Vd.8h, Vn.8h
4935 010 01110 00 1 00000 010010 n d CLS Vd.16b, Vn.16b
4936
4937 011 01110 10 1 00000 010010 n d CLZ Vd.4s, Vn.4s
4938 011 01110 01 1 00000 010010 n d CLZ Vd.8h, Vn.8h
4939 011 01110 00 1 00000 010010 n d CLZ Vd.16b, Vn.16b
4940
4941 010 01110 00 1 00000 010110 n d CNT Vd.16b, Vn.16b
4942
4943 011 01110 01 1 00000 010110 n d RBIT Vd.16b, Vn.16b
4944 010 01110 00 1 00000 000110 n d REV16 Vd.16b, Vn.16b
4945 011 01110 00 1 00000 000010 n d REV32 Vd.16b, Vn.16b
4946 011 01110 01 1 00000 000010 n d REV32 Vd.8h, Vn.8h
4947
4948 010 01110 00 1 00000 000010 n d REV64 Vd.16b, Vn.16b
4949 010 01110 01 1 00000 000010 n d REV64 Vd.8h, Vn.8h
4950 010 01110 10 1 00000 000010 n d REV64 Vd.4s, Vn.4s
4951
4952 010 01110 10 1 00001 110010 n d URECPE Vd.4s, Vn.4s
4953 011 01110 10 1 00001 110010 n d URSQRTE Vd.4s, Vn.4s
4954
4955 010 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
4956 010 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
4957
4958 011 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
4959 011 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
4960
4961 011 01110 11 1 00001 111110 n d FSQRT Vd.2d, Vn.2d
4962 011 01110 10 1 00001 111110 n d FSQRT Vd.4s, Vn.4s
4963 */
4964 UInt vD = qregEnc(i->ARM64in.VUnaryV.dst);
4965 UInt vN = qregEnc(i->ARM64in.VUnaryV.arg);
4966 switch (i->ARM64in.VUnaryV.op) {
4967 case ARM64vecu_FABS64x2:
4968 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
4969 break;
4970 case ARM64vecu_FABS32x4:
4971 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD);
4972 break;
4973 case ARM64vecu_FNEG64x2:
4974 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
4975 break;
4976 case ARM64vecu_FNEG32x4:
4977 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
4978 break;
4979 case ARM64vecu_NOT:
4980 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
4981 break;
4982 case ARM64vecu_ABS64x2:
4983 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X101110, vN, vD);
4984 break;
4985 case ARM64vecu_ABS32x4:
4986 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X101110, vN, vD);
4987 break;
4988 case ARM64vecu_ABS16x8:
4989 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X101110, vN, vD);
4990 break;
4991 case ARM64vecu_ABS8x16:
4992 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X101110, vN, vD);
4993 break;
4994 case ARM64vecu_CLS32x4:
4995 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X010010, vN, vD);
4996 break;
4997 case ARM64vecu_CLS16x8:
4998 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X010010, vN, vD);
4999 break;
5000 case ARM64vecu_CLS8x16:
5001 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010010, vN, vD);
5002 break;
5003 case ARM64vecu_CLZ32x4:
5004 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X010010, vN, vD);
5005 break;
5006 case ARM64vecu_CLZ16x8:
5007 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010010, vN, vD);
5008 break;
5009 case ARM64vecu_CLZ8x16:
5010 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010010, vN, vD);
5011 break;
5012 case ARM64vecu_CNT8x16:
5013 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
5014 break;
5015 case ARM64vecu_RBIT:
5016 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010110, vN, vD);
5017 break;
5018 case ARM64vecu_REV1616B:
5019 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000110, vN, vD);
5020 break;
5021 case ARM64vecu_REV3216B:
5022 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X000010, vN, vD);
5023 break;
5024 case ARM64vecu_REV328H:
5025 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X000010, vN, vD);
5026 break;
5027 case ARM64vecu_REV6416B:
5028 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000010, vN, vD);
5029 break;
5030 case ARM64vecu_REV648H:
5031 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X000010, vN, vD);
5032 break;
5033 case ARM64vecu_REV644S:
5034 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X000010, vN, vD);
5035 break;
5036 case ARM64vecu_URECPE32x4:
5037 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110010, vN, vD);
5038 break;
5039 case ARM64vecu_URSQRTE32x4:
5040 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD);
5041 break;
5042 case ARM64vecu_FRECPE64x2:
5043 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00001, X110110, vN, vD);
5044 break;
5045 case ARM64vecu_FRECPE32x4:
5046 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110110, vN, vD);
5047 break;
5048 case ARM64vecu_FRSQRTE64x2:
5049 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X110110, vN, vD);
5050 break;
5051 case ARM64vecu_FRSQRTE32x4:
5052 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110110, vN, vD);
5053 break;
5054 case ARM64vecu_FSQRT64x2:
5055 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X111110, vN, vD);
5056 break;
5057 case ARM64vecu_FSQRT32x4:
5058 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X111110, vN, vD);
5059 break;
5060 default:
5061 goto bad;
5062 }
5063 goto done;
5064 }
5065 case ARM64in_VNarrowV: {
5066 /* 31 23 21 15 9 4
5067 000 01110 00 1,00001 001010 n d XTN Vd.8b, Vn.8h
5068 000 01110 01 1,00001 001010 n d XTN Vd.4h, Vn.4s
5069 000 01110 10 1,00001 001010 n d XTN Vd.2s, Vn.2d
5070
5071 001 01110 00 1,00001 001010 n d SQXTUN Vd.8b, Vn.8h
5072 001 01110 01 1,00001 001010 n d SQXTUN Vd.4h, Vn.4s
5073 001 01110 10 1,00001 001010 n d SQXTUN Vd.2s, Vn.2d
5074
5075 000 01110 00 1,00001 010010 n d SQXTN Vd.8b, Vn.8h
5076 000 01110 01 1,00001 010010 n d SQXTN Vd.4h, Vn.4s
5077 000 01110 10 1,00001 010010 n d SQXTN Vd.2s, Vn.2d
5078
5079 001 01110 00 1,00001 010010 n d UQXTN Vd.8b, Vn.8h
5080 001 01110 01 1,00001 010010 n d UQXTN Vd.4h, Vn.4s
5081 001 01110 10 1,00001 010010 n d UQXTN Vd.2s, Vn.2d
5082 */
5083 UInt vD = qregEnc(i->ARM64in.VNarrowV.dst);
5084 UInt vN = qregEnc(i->ARM64in.VNarrowV.src);
5085 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
5086 vassert(dszBlg2 >= 0 && dszBlg2 <= 2);
5087 switch (i->ARM64in.VNarrowV.op) {
5088 case ARM64vecna_XTN:
5089 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5090 X00001, X001010, vN, vD);
5091 goto done;
5092 case ARM64vecna_SQXTUN:
5093 *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
5094 X00001, X001010, vN, vD);
5095 goto done;
5096 case ARM64vecna_SQXTN:
5097 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5098 X00001, X010010, vN, vD);
5099 goto done;
5100 case ARM64vecna_UQXTN:
5101 *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
5102 X00001, X010010, vN, vD);
5103 goto done;
5104 default:
5105 break;
5106 }
5107 goto bad;
5108 }
5109 case ARM64in_VShiftImmV: {
5110 /*
5111 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
5112 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
5113
5114 001 011110 immh immb 100101 n d UQSHRN ,,#sh
5115 000 011110 immh immb 100101 n d SQSHRN ,,#sh
5116 001 011110 immh immb 100001 n d SQSHRUN ,,#sh
5117
5118 001 011110 immh immb 100111 n d UQRSHRN ,,#sh
5119 000 011110 immh immb 100111 n d SQRSHRN ,,#sh
5120 001 011110 immh immb 100011 n d SQRSHRUN ,,#sh
5121
5122 where immh:immb
5123 = case T of
5124 2d | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
5125 4s | sh in 1..32 -> let xxxxx = 32-sh in 01xx:xxx
5126 8h | sh in 1..16 -> let xxxx = 16-sh in 001x:xxx
5127 16b | sh in 1..8 -> let xxx = 8-sh in 0001:xxx
5128
5129 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
5130
5131 011 011110 immh immb 011101 n d UQSHL Vd.T, Vn.T, #sh
5132 010 011110 immh immb 011101 n d SQSHL Vd.T, Vn.T, #sh
5133 011 011110 immh immb 011001 n d SQSHLU Vd.T, Vn.T, #sh
5134
5135 where immh:immb
5136 = case T of
5137 2d | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
5138 4s | sh in 0..31 -> let xxxxx = sh in 01xx:xxx
5139 8h | sh in 0..15 -> let xxxx = sh in 001x:xxx
5140 16b | sh in 0..7 -> let xxx = sh in 0001:xxx
5141 */
5142 UInt vD = qregEnc(i->ARM64in.VShiftImmV.dst);
5143 UInt vN = qregEnc(i->ARM64in.VShiftImmV.src);
5144 UInt sh = i->ARM64in.VShiftImmV.amt;
5145 UInt tmpl = 0; /* invalid */
5146
5147 const UInt tmpl_USHR
5148 = X_3_6_7_6_5_5(X011, X011110, 0, X000001, vN, vD);
5149 const UInt tmpl_SSHR
5150 = X_3_6_7_6_5_5(X010, X011110, 0, X000001, vN, vD);
5151
5152 const UInt tmpl_UQSHRN
5153 = X_3_6_7_6_5_5(X001, X011110, 0, X100101, vN, vD);
5154 const UInt tmpl_SQSHRN
5155 = X_3_6_7_6_5_5(X000, X011110, 0, X100101, vN, vD);
5156 const UInt tmpl_SQSHRUN
5157 = X_3_6_7_6_5_5(X001, X011110, 0, X100001, vN, vD);
5158
5159 const UInt tmpl_UQRSHRN
5160 = X_3_6_7_6_5_5(X001, X011110, 0, X100111, vN, vD);
5161 const UInt tmpl_SQRSHRN
5162 = X_3_6_7_6_5_5(X000, X011110, 0, X100111, vN, vD);
5163 const UInt tmpl_SQRSHRUN
5164 = X_3_6_7_6_5_5(X001, X011110, 0, X100011, vN, vD);
5165
5166 const UInt tmpl_SHL
5167 = X_3_6_7_6_5_5(X010, X011110, 0, X010101, vN, vD);
5168
5169 const UInt tmpl_UQSHL
5170 = X_3_6_7_6_5_5(X011, X011110, 0, X011101, vN, vD);
5171 const UInt tmpl_SQSHL
5172 = X_3_6_7_6_5_5(X010, X011110, 0, X011101, vN, vD);
5173 const UInt tmpl_SQSHLU
5174 = X_3_6_7_6_5_5(X011, X011110, 0, X011001, vN, vD);
5175
5176 switch (i->ARM64in.VShiftImmV.op) {
5177 case ARM64vecshi_SSHR64x2: tmpl = tmpl_SSHR; goto right64x2;
5178 case ARM64vecshi_USHR64x2: tmpl = tmpl_USHR; goto right64x2;
5179 case ARM64vecshi_SHL64x2: tmpl = tmpl_SHL; goto left64x2;
5180 case ARM64vecshi_UQSHL64x2: tmpl = tmpl_UQSHL; goto left64x2;
5181 case ARM64vecshi_SQSHL64x2: tmpl = tmpl_SQSHL; goto left64x2;
5182 case ARM64vecshi_SQSHLU64x2: tmpl = tmpl_SQSHLU; goto left64x2;
5183 case ARM64vecshi_SSHR32x4: tmpl = tmpl_SSHR; goto right32x4;
5184 case ARM64vecshi_USHR32x4: tmpl = tmpl_USHR; goto right32x4;
5185 case ARM64vecshi_UQSHRN2SD: tmpl = tmpl_UQSHRN; goto right32x4;
5186 case ARM64vecshi_SQSHRN2SD: tmpl = tmpl_SQSHRN; goto right32x4;
5187 case ARM64vecshi_SQSHRUN2SD: tmpl = tmpl_SQSHRUN; goto right32x4;
5188 case ARM64vecshi_UQRSHRN2SD: tmpl = tmpl_UQRSHRN; goto right32x4;
5189 case ARM64vecshi_SQRSHRN2SD: tmpl = tmpl_SQRSHRN; goto right32x4;
5190 case ARM64vecshi_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
5191 case ARM64vecshi_SHL32x4: tmpl = tmpl_SHL; goto left32x4;
5192 case ARM64vecshi_UQSHL32x4: tmpl = tmpl_UQSHL; goto left32x4;
5193 case ARM64vecshi_SQSHL32x4: tmpl = tmpl_SQSHL; goto left32x4;
5194 case ARM64vecshi_SQSHLU32x4: tmpl = tmpl_SQSHLU; goto left32x4;
5195 case ARM64vecshi_SSHR16x8: tmpl = tmpl_SSHR; goto right16x8;
5196 case ARM64vecshi_USHR16x8: tmpl = tmpl_USHR; goto right16x8;
5197 case ARM64vecshi_UQSHRN4HS: tmpl = tmpl_UQSHRN; goto right16x8;
5198 case ARM64vecshi_SQSHRN4HS: tmpl = tmpl_SQSHRN; goto right16x8;
5199 case ARM64vecshi_SQSHRUN4HS: tmpl = tmpl_SQSHRUN; goto right16x8;
5200 case ARM64vecshi_UQRSHRN4HS: tmpl = tmpl_UQRSHRN; goto right16x8;
5201 case ARM64vecshi_SQRSHRN4HS: tmpl = tmpl_SQRSHRN; goto right16x8;
5202 case ARM64vecshi_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
5203 case ARM64vecshi_SHL16x8: tmpl = tmpl_SHL; goto left16x8;
5204 case ARM64vecshi_UQSHL16x8: tmpl = tmpl_UQSHL; goto left16x8;
5205 case ARM64vecshi_SQSHL16x8: tmpl = tmpl_SQSHL; goto left16x8;
5206 case ARM64vecshi_SQSHLU16x8: tmpl = tmpl_SQSHLU; goto left16x8;
5207 case ARM64vecshi_SSHR8x16: tmpl = tmpl_SSHR; goto right8x16;
5208 case ARM64vecshi_USHR8x16: tmpl = tmpl_USHR; goto right8x16;
5209 case ARM64vecshi_UQSHRN8BH: tmpl = tmpl_UQSHRN; goto right8x16;
5210 case ARM64vecshi_SQSHRN8BH: tmpl = tmpl_SQSHRN; goto right8x16;
5211 case ARM64vecshi_SQSHRUN8BH: tmpl = tmpl_SQSHRUN; goto right8x16;
5212 case ARM64vecshi_UQRSHRN8BH: tmpl = tmpl_UQRSHRN; goto right8x16;
5213 case ARM64vecshi_SQRSHRN8BH: tmpl = tmpl_SQRSHRN; goto right8x16;
5214 case ARM64vecshi_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
5215 case ARM64vecshi_SHL8x16: tmpl = tmpl_SHL; goto left8x16;
5216 case ARM64vecshi_UQSHL8x16: tmpl = tmpl_UQSHL; goto left8x16;
5217 case ARM64vecshi_SQSHL8x16: tmpl = tmpl_SQSHL; goto left8x16;
5218 case ARM64vecshi_SQSHLU8x16: tmpl = tmpl_SQSHLU; goto left8x16;
5219
5220 default: break;
5221
5222 right64x2:
5223 if (sh >= 1 && sh <= 63) {
5224 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | (64-sh), 0,0,0);
5225 goto done;
5226 }
5227 break;
5228 right32x4:
5229 if (sh >= 1 && sh <= 32) {
5230 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | (32-sh), 0,0,0);
5231 goto done;
5232 }
5233 break;
5234 right16x8:
5235 if (sh >= 1 && sh <= 16) {
5236 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | (16-sh), 0,0,0);
5237 goto done;
5238 }
5239 break;
5240 right8x16:
5241 if (sh >= 1 && sh <= 8) {
5242 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | (8-sh), 0,0,0);
5243 goto done;
5244 }
5245 break;
5246
5247 left64x2:
5248 if (sh >= 0 && sh <= 63) {
5249 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | sh, 0,0,0);
5250 goto done;
5251 }
5252 break;
5253 left32x4:
5254 if (sh >= 0 && sh <= 31) {
5255 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | sh, 0,0,0);
5256 goto done;
5257 }
5258 break;
5259 left16x8:
5260 if (sh >= 0 && sh <= 15) {
5261 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | sh, 0,0,0);
5262 goto done;
5263 }
5264 break;
5265 left8x16:
5266 if (sh >= 0 && sh <= 7) {
5267 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | sh, 0,0,0);
5268 goto done;
5269 }
5270 break;
5271 }
5272 goto bad;
5273 }
5274 case ARM64in_VExtV: {
5275 /*
5276 011 01110 000 m 0 imm4 0 n d EXT Vd.16b, Vn.16b, Vm.16b, #imm4
5277 where imm4 = the shift amount, in bytes,
5278 Vn is low operand, Vm is high operand
5279 */
5280 UInt vD = qregEnc(i->ARM64in.VExtV.dst);
5281 UInt vN = qregEnc(i->ARM64in.VExtV.srcLo);
5282 UInt vM = qregEnc(i->ARM64in.VExtV.srcHi);
5283 UInt imm4 = i->ARM64in.VExtV.amtB;
5284 vassert(imm4 >= 1 && imm4 <= 15);
5285 *p++ = X_3_8_5_6_5_5(X011, X01110000, vM,
5286 X000000 | (imm4 << 1), vN, vD);
5287 goto done;
5288 }
5289 case ARM64in_VImmQ: {
5290 UInt rQ = qregEnc(i->ARM64in.VImmQ.rQ);
5291 UShort imm = i->ARM64in.VImmQ.imm;
5292 vassert(rQ < 32);
5293 switch (imm) {
5294 case 0x0000:
5295 // movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ
5296 *p++ = 0x4F000400 | rQ;
5297 goto done;
5298 case 0x0001:
5299 // movi rQ, #0xFF == 0x2F 0x00 0xE4 001 rQ
5300 *p++ = 0x2F00E420 | rQ;
5301 goto done;
5302 case 0x0003:
5303 // movi rQ, #0xFFFF == 0x2F 0x00 0xE4 011 rQ
5304 *p++ = 0x2F00E460 | rQ;
5305 goto done;
5306 case 0x000F:
5307 // movi rQ, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rQ
5308 *p++ = 0x2F00E5E0 | rQ;
5309 goto done;
5310 case 0x003F:
5311 // movi rQ, #0xFFFFFFFFFFFF == 0x2F 0x01 0xE7 111 rQ
5312 *p++ = 0x2F01E7E0 | rQ;
5313 goto done;
5314 case 0x00FF:
5315 // movi rQ, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rQ
5316 *p++ = 0x2F07E7E0 | rQ;
5317 goto done;
5318 case 0xFFFF:
5319 // mvni rQ.4s, #0x0 == 0x6F 0x00 0x04 000 rQ
5320 *p++ = 0x6F000400 | rQ;
5321 goto done;
5322 default:
5323 break;
5324 }
5325 goto bad; /* no other handled cases right now */
5326 }
5327
5328 case ARM64in_VDfromX: {
5329 /* INS Vd.D[0], rX
5330 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5331 This isn't wonderful, in the sense that the upper half of
5332 the vector register stays unchanged and thus the insn is
5333 data dependent on its output register. */
5334 UInt dd = dregEnc(i->ARM64in.VDfromX.rD);
5335 UInt xx = iregEnc(i->ARM64in.VDfromX.rX);
5336 vassert(xx < 31);
5337 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5338 goto done;
5339 }
5340
5341 case ARM64in_VQfromX: {
5342 /* FMOV D, X
5343 1001 1110 0110 0111 0000 00 nn dd FMOV Vd.D[0], Xn
5344 I think this zeroes out the top half of the destination, which
5345 is what we need. TODO: can we do VDfromX and VQfromXX better? */
5346 UInt dd = qregEnc(i->ARM64in.VQfromX.rQ);
5347 UInt xx = iregEnc(i->ARM64in.VQfromX.rXlo);
5348 vassert(xx < 31);
5349 *p++ = 0x9E670000 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5350 goto done;
5351 }
5352
5353 case ARM64in_VQfromXX: {
5354 /* What we really generate is a two insn sequence:
5355 INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
5356 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5357 0100 1110 0001 1000 0001 11 nn dd INS Vd.D[1], Xn
5358 */
5359 UInt qq = qregEnc(i->ARM64in.VQfromXX.rQ);
5360 UInt xhi = iregEnc(i->ARM64in.VQfromXX.rXhi);
5361 UInt xlo = iregEnc(i->ARM64in.VQfromXX.rXlo);
5362 vassert(xhi < 31 && xlo < 31);
5363 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq);
5364 *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq);
5365 goto done;
5366 }
5367
5368 case ARM64in_VXfromQ: {
5369 /* 010 0111 0000 01000 001111 nn dd UMOV Xd, Vn.D[0]
5370 010 0111 0000 11000 001111 nn dd UMOV Xd, Vn.D[1]
5371 */
5372 UInt dd = iregEnc(i->ARM64in.VXfromQ.rX);
5373 UInt nn = qregEnc(i->ARM64in.VXfromQ.rQ);
5374 UInt laneNo = i->ARM64in.VXfromQ.laneNo;
5375 vassert(dd < 31);
5376 vassert(laneNo < 2);
5377 *p++ = X_3_8_5_6_5_5(X010, X01110000,
5378 laneNo == 1 ? X11000 : X01000, X001111, nn, dd);
5379 goto done;
5380 }
5381
5382 case ARM64in_VXfromDorS: {
5383 /* 000 11110001 00110 000000 n d FMOV Wd, Sn
5384 100 11110011 00110 000000 n d FMOV Xd, Dn
5385 */
5386 UInt dd = iregEnc(i->ARM64in.VXfromDorS.rX);
5387 UInt nn = dregEnc(i->ARM64in.VXfromDorS.rDorS);
5388 Bool fromD = i->ARM64in.VXfromDorS.fromD;
5389 vassert(dd < 31);
5390 *p++ = X_3_8_5_6_5_5(fromD ? X100 : X000,
5391 fromD ? X11110011 : X11110001,
5392 X00110, X000000, nn, dd);
5393 goto done;
5394 }
5395
5396 case ARM64in_VMov: {
5397 /* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
5398 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
5399 010 01110 10 1 n 0 00111 n d MOV Vd.16b, Vn.16b
5400 */
5401 HReg rD = i->ARM64in.VMov.dst;
5402 HReg rN = i->ARM64in.VMov.src;
5403 switch (i->ARM64in.VMov.szB) {
5404 case 16: {
5405 UInt dd = qregEnc(rD);
5406 UInt nn = qregEnc(rN);
5407 *p++ = X_3_8_5_6_5_5(X010, X01110101, nn, X000111, nn, dd);
5408 goto done;
5409 }
5410 case 8: {
5411 UInt dd = dregEnc(rD);
5412 UInt nn = dregEnc(rN);
5413 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd);
5414 goto done;
5415 }
5416 default:
5417 break;
5418 }
5419 goto bad;
5420 }
5421
5422 case ARM64in_EvCheck: {
5423 /* The sequence is fixed (canned) except for the two amodes
5424 supplied by the insn. These don't change the length, though.
5425 We generate:
5426 ldr w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5427 subs w9, w9, #1
5428 str w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5429 bpl nofail
5430 ldr x9, [x21 + #0] 0 == offsetof(host_EvC_FAILADDR)
5431 br x9
5432 nofail:
5433 */
5434 UInt* p0 = p;
5435 p = do_load_or_store32(p, True/*isLoad*/, /*w*/9,
5436 i->ARM64in.EvCheck.amCounter);
5437 *p++ = 0x71000529; /* subs w9, w9, #1 */
5438 p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9,
5439 i->ARM64in.EvCheck.amCounter);
5440 *p++ = 0x54000065; /* bpl nofail */
5441 p = do_load_or_store64(p, True/*isLoad*/, /*x*/9,
5442 i->ARM64in.EvCheck.amFailAddr);
5443 *p++ = 0xD61F0120; /* br x9 */
5444 /* nofail: */
5445
5446 /* Crosscheck */
5447 vassert(evCheckSzB_ARM64() == (UChar*)p - (UChar*)p0);
5448 goto done;
5449 }
5450
5451 case ARM64in_ProfInc: {
5452 /* We generate:
5453 (ctrP is unknown now, so use 0x6555'7555'8555'9566 in the
5454 expectation that a later call to LibVEX_patchProfCtr
5455 will be used to fill in the immediate fields once the
5456 right value is known.)
5457 imm64-exactly4 x9, 0x6555'7555'8555'9566
5458 ldr x8, [x9]
5459 add x8, x8, #1
5460 str x8, [x9]
5461 */
5462 p = imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL);
5463 *p++ = 0xF9400128;
5464 *p++ = 0x91000508;
5465 *p++ = 0xF9000128;
5466 /* Tell the caller .. */
5467 vassert(!(*is_profInc));
5468 *is_profInc = True;
5469 goto done;
5470 }
5471
5472 /* ... */
5473 default:
5474 goto bad;
5475 }
5476
5477 bad:
5478 ppARM64Instr(i);
5479 vpanic("emit_ARM64Instr");
5480 /*NOTREACHED*/
5481
5482 done:
5483 vassert(((UChar*)p) - &buf[0] <= 36);
5484 return ((UChar*)p) - &buf[0];
5485 }
5486
5487
5488 /* How big is an event check? See case for ARM64in_EvCheck in
5489 emit_ARM64Instr just above. That crosschecks what this returns, so
5490 we can tell if we're inconsistent. */
evCheckSzB_ARM64(void)5491 Int evCheckSzB_ARM64 (void)
5492 {
5493 return 24;
5494 }
5495
5496
5497 /* NB: what goes on here has to be very closely coordinated with the
5498 emitInstr case for XDirect, above. */
chainXDirect_ARM64(VexEndness endness_host,void * place_to_chain,const void * disp_cp_chain_me_EXPECTED,const void * place_to_jump_to)5499 VexInvalRange chainXDirect_ARM64 ( VexEndness endness_host,
5500 void* place_to_chain,
5501 const void* disp_cp_chain_me_EXPECTED,
5502 const void* place_to_jump_to )
5503 {
5504 vassert(endness_host == VexEndnessLE);
5505
5506 /* What we're expecting to see is:
5507 movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
5508 movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
5509 movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
5510 movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
5511 blr x9
5512 viz
5513 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5514 D6 3F 01 20
5515 */
5516 UInt* p = (UInt*)place_to_chain;
5517 vassert(0 == (3 & (HWord)p));
5518 vassert(is_imm64_to_ireg_EXACTLY4(
5519 p, /*x*/9, (Addr)disp_cp_chain_me_EXPECTED));
5520 vassert(p[4] == 0xD63F0120);
5521
5522 /* And what we want to change it to is:
5523 movw x9, place_to_jump_to[15:0]
5524 movk x9, place_to_jump_to[31:15], lsl 16
5525 movk x9, place_to_jump_to[47:32], lsl 32
5526 movk x9, place_to_jump_to[63:48], lsl 48
5527 br x9
5528 viz
5529 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5530 D6 1F 01 20
5531
5532 The replacement has the same length as the original.
5533 */
5534 (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)place_to_jump_to);
5535 p[4] = 0xD61F0120;
5536
5537 VexInvalRange vir = {(HWord)p, 20};
5538 return vir;
5539 }
5540
5541
5542 /* NB: what goes on here has to be very closely coordinated with the
5543 emitInstr case for XDirect, above. */
unchainXDirect_ARM64(VexEndness endness_host,void * place_to_unchain,const void * place_to_jump_to_EXPECTED,const void * disp_cp_chain_me)5544 VexInvalRange unchainXDirect_ARM64 ( VexEndness endness_host,
5545 void* place_to_unchain,
5546 const void* place_to_jump_to_EXPECTED,
5547 const void* disp_cp_chain_me )
5548 {
5549 vassert(endness_host == VexEndnessLE);
5550
5551 /* What we're expecting to see is:
5552 movw x9, place_to_jump_to_EXPECTED[15:0]
5553 movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
5554 movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
5555 movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
5556 br x9
5557 viz
5558 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5559 D6 1F 01 20
5560 */
5561 UInt* p = (UInt*)place_to_unchain;
5562 vassert(0 == (3 & (HWord)p));
5563 vassert(is_imm64_to_ireg_EXACTLY4(
5564 p, /*x*/9, (Addr)place_to_jump_to_EXPECTED));
5565 vassert(p[4] == 0xD61F0120);
5566
5567 /* And what we want to change it to is:
5568 movw x9, disp_cp_chain_me_to[15:0]
5569 movk x9, disp_cp_chain_me_to[31:15], lsl 16
5570 movk x9, disp_cp_chain_me_to[47:32], lsl 32
5571 movk x9, disp_cp_chain_me_to[63:48], lsl 48
5572 blr x9
5573 viz
5574 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5575 D6 3F 01 20
5576 */
5577 (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
5578 p[4] = 0xD63F0120;
5579
5580 VexInvalRange vir = {(HWord)p, 20};
5581 return vir;
5582 }
5583
5584
5585 /* Patch the counter address into a profile inc point, as previously
5586 created by the ARM64in_ProfInc case for emit_ARM64Instr. */
patchProfInc_ARM64(VexEndness endness_host,void * place_to_patch,const ULong * location_of_counter)5587 VexInvalRange patchProfInc_ARM64 ( VexEndness endness_host,
5588 void* place_to_patch,
5589 const ULong* location_of_counter )
5590 {
5591 vassert(sizeof(ULong*) == 8);
5592 vassert(endness_host == VexEndnessLE);
5593 UInt* p = (UInt*)place_to_patch;
5594 vassert(0 == (3 & (HWord)p));
5595 vassert(is_imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL));
5596 vassert(p[4] == 0xF9400128);
5597 vassert(p[5] == 0x91000508);
5598 vassert(p[6] == 0xF9000128);
5599 imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)location_of_counter);
5600 VexInvalRange vir = {(HWord)p, 4*4};
5601 return vir;
5602 }
5603
5604 /*---------------------------------------------------------------*/
5605 /*--- end host_arm64_defs.c ---*/
5606 /*---------------------------------------------------------------*/
5607