1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_defs.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2013-2013 OpenWorks
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #include "libvex_basictypes.h"
32 #include "libvex.h"
33 #include "libvex_trc_values.h"
34
35 #include "main_util.h"
36 #include "host_generic_regs.h"
37 #include "host_arm64_defs.h"
38
39 //ZZ UInt arm_hwcaps = 0;
40
41
42 /* --------- Registers. --------- */
43
44 /* The usual HReg abstraction. We use the following classes only:
45 X regs (64 bit int)
46 D regs (64 bit float, also used for 32 bit float)
47 Q regs (128 bit vector)
48 */
49
ppHRegARM64(HReg reg)50 void ppHRegARM64 ( HReg reg ) {
51 Int r;
52 /* Be generic for all virtual regs. */
53 if (hregIsVirtual(reg)) {
54 ppHReg(reg);
55 return;
56 }
57 /* But specific for real regs. */
58 switch (hregClass(reg)) {
59 case HRcInt64:
60 r = hregNumber(reg);
61 vassert(r >= 0 && r < 31);
62 vex_printf("x%d", r);
63 return;
64 case HRcFlt64:
65 r = hregNumber(reg);
66 vassert(r >= 0 && r < 32);
67 vex_printf("d%d", r);
68 return;
69 case HRcVec128:
70 r = hregNumber(reg);
71 vassert(r >= 0 && r < 32);
72 vex_printf("q%d", r);
73 return;
74 default:
75 vpanic("ppHRegARM64");
76 }
77 }
78
ppHRegARM64asSreg(HReg reg)79 static void ppHRegARM64asSreg ( HReg reg ) {
80 ppHRegARM64(reg);
81 vex_printf("(S-reg)");
82 }
83
hregARM64_X0(void)84 HReg hregARM64_X0 ( void ) { return mkHReg(0, HRcInt64, False); }
hregARM64_X1(void)85 HReg hregARM64_X1 ( void ) { return mkHReg(1, HRcInt64, False); }
hregARM64_X2(void)86 HReg hregARM64_X2 ( void ) { return mkHReg(2, HRcInt64, False); }
hregARM64_X3(void)87 HReg hregARM64_X3 ( void ) { return mkHReg(3, HRcInt64, False); }
hregARM64_X4(void)88 HReg hregARM64_X4 ( void ) { return mkHReg(4, HRcInt64, False); }
hregARM64_X5(void)89 HReg hregARM64_X5 ( void ) { return mkHReg(5, HRcInt64, False); }
hregARM64_X6(void)90 HReg hregARM64_X6 ( void ) { return mkHReg(6, HRcInt64, False); }
hregARM64_X7(void)91 HReg hregARM64_X7 ( void ) { return mkHReg(7, HRcInt64, False); }
92 //ZZ HReg hregARM_R8 ( void ) { return mkHReg(8, HRcInt32, False); }
hregARM64_X9(void)93 HReg hregARM64_X9 ( void ) { return mkHReg(9, HRcInt64, False); }
hregARM64_X10(void)94 HReg hregARM64_X10 ( void ) { return mkHReg(10, HRcInt64, False); }
hregARM64_X11(void)95 HReg hregARM64_X11 ( void ) { return mkHReg(11, HRcInt64, False); }
hregARM64_X12(void)96 HReg hregARM64_X12 ( void ) { return mkHReg(12, HRcInt64, False); }
hregARM64_X13(void)97 HReg hregARM64_X13 ( void ) { return mkHReg(13, HRcInt64, False); }
hregARM64_X14(void)98 HReg hregARM64_X14 ( void ) { return mkHReg(14, HRcInt64, False); }
hregARM64_X15(void)99 HReg hregARM64_X15 ( void ) { return mkHReg(15, HRcInt64, False); }
hregARM64_X21(void)100 HReg hregARM64_X21 ( void ) { return mkHReg(21, HRcInt64, False); }
hregARM64_X22(void)101 HReg hregARM64_X22 ( void ) { return mkHReg(22, HRcInt64, False); }
hregARM64_X23(void)102 HReg hregARM64_X23 ( void ) { return mkHReg(23, HRcInt64, False); }
hregARM64_X24(void)103 HReg hregARM64_X24 ( void ) { return mkHReg(24, HRcInt64, False); }
hregARM64_X25(void)104 HReg hregARM64_X25 ( void ) { return mkHReg(25, HRcInt64, False); }
hregARM64_X26(void)105 HReg hregARM64_X26 ( void ) { return mkHReg(26, HRcInt64, False); }
hregARM64_X27(void)106 HReg hregARM64_X27 ( void ) { return mkHReg(27, HRcInt64, False); }
hregARM64_X28(void)107 HReg hregARM64_X28 ( void ) { return mkHReg(28, HRcInt64, False); }
108
109 // Should really use D8 .. D15 for class F64, since they are callee
110 // save
hregARM64_D8(void)111 HReg hregARM64_D8 ( void ) { return mkHReg(8, HRcFlt64, False); }
hregARM64_D9(void)112 HReg hregARM64_D9 ( void ) { return mkHReg(9, HRcFlt64, False); }
hregARM64_D10(void)113 HReg hregARM64_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
hregARM64_D11(void)114 HReg hregARM64_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
hregARM64_D12(void)115 HReg hregARM64_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
hregARM64_D13(void)116 HReg hregARM64_D13 ( void ) { return mkHReg(13, HRcFlt64, False); }
117 //ZZ HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
118 //ZZ HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
119 //ZZ HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
120 //ZZ HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
121 //ZZ HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
hregARM64_Q16(void)122 HReg hregARM64_Q16 ( void ) { return mkHReg(16, HRcVec128, False); }
hregARM64_Q17(void)123 HReg hregARM64_Q17 ( void ) { return mkHReg(17, HRcVec128, False); }
hregARM64_Q18(void)124 HReg hregARM64_Q18 ( void ) { return mkHReg(18, HRcVec128, False); }
125 //ZZ HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
126 //ZZ HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
127 //ZZ HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
128 //ZZ HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
129 //ZZ HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
130
getAllocableRegs_ARM64(Int * nregs,HReg ** arr)131 void getAllocableRegs_ARM64 ( Int* nregs, HReg** arr )
132 {
133 Int i = 0;
134 *nregs = 24;
135 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
136
137 // callee saves ones (22 to 28) are listed first, since we prefer
138 // them if they're available
139 (*arr)[i++] = hregARM64_X22();
140 (*arr)[i++] = hregARM64_X23();
141 (*arr)[i++] = hregARM64_X24();
142 (*arr)[i++] = hregARM64_X25();
143 (*arr)[i++] = hregARM64_X26();
144 (*arr)[i++] = hregARM64_X27();
145 (*arr)[i++] = hregARM64_X28();
146
147 (*arr)[i++] = hregARM64_X0();
148 (*arr)[i++] = hregARM64_X1();
149 (*arr)[i++] = hregARM64_X2();
150 (*arr)[i++] = hregARM64_X3();
151 (*arr)[i++] = hregARM64_X4();
152 (*arr)[i++] = hregARM64_X5();
153 (*arr)[i++] = hregARM64_X6();
154 (*arr)[i++] = hregARM64_X7();
155 // X8 .. who knows.
156 // X9 is a chaining/spill temporary, not available to regalloc.
157
158 // Do we really need all these?
159 //(*arr)[i++] = hregARM64_X10();
160 //(*arr)[i++] = hregARM64_X11();
161 //(*arr)[i++] = hregARM64_X12();
162 //(*arr)[i++] = hregARM64_X13();
163 //(*arr)[i++] = hregARM64_X14();
164 //(*arr)[i++] = hregARM64_X15();
165 // X21 is the guest state pointer, not available to regalloc.
166
167 // vector regs. Unfortunately not callee-saved.
168 (*arr)[i++] = hregARM64_Q16();
169 (*arr)[i++] = hregARM64_Q17();
170 (*arr)[i++] = hregARM64_Q18();
171
172 // F64 regs, all of which are callee-saved
173 (*arr)[i++] = hregARM64_D8();
174 (*arr)[i++] = hregARM64_D9();
175 (*arr)[i++] = hregARM64_D10();
176 (*arr)[i++] = hregARM64_D11();
177 (*arr)[i++] = hregARM64_D12();
178 (*arr)[i++] = hregARM64_D13();
179
180 // unavail: x21 as GSP
181 // x9 is used as a spill/reload/chaining/call temporary
182 // x8 is unassigned
183 // x30 as LR
184 // x31 because dealing with the SP-vs-ZR overloading is too
185 // confusing, and we don't need to do so, so let's just avoid
186 // the problem
187 //
188 // Currently, we have 15 allocatable integer registers:
189 // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
190 //
191 // Hence for the allocatable integer registers we have:
192 //
193 // callee-saved: 22 23 24 25 26 27 28
194 // caller-saved: 0 1 2 3 4 5 6 7
195 //
196 // If the set of available registers changes or if the e/r status
197 // changes, be sure to re-check/sync the definition of
198 // getHRegUsage for ARMInstr_Call too.
199 vassert(i == *nregs);
200 }
201
202
203 /* --------- Condition codes, ARM64 encoding. --------- */
204
showARM64CondCode(ARM64CondCode cond)205 static const HChar* showARM64CondCode ( ARM64CondCode cond ) {
206 switch (cond) {
207 case ARM64cc_EQ: return "eq";
208 case ARM64cc_NE: return "ne";
209 case ARM64cc_CS: return "cs";
210 case ARM64cc_CC: return "cc";
211 case ARM64cc_MI: return "mi";
212 case ARM64cc_PL: return "pl";
213 case ARM64cc_VS: return "vs";
214 case ARM64cc_VC: return "vc";
215 case ARM64cc_HI: return "hi";
216 case ARM64cc_LS: return "ls";
217 case ARM64cc_GE: return "ge";
218 case ARM64cc_LT: return "lt";
219 case ARM64cc_GT: return "gt";
220 case ARM64cc_LE: return "le";
221 case ARM64cc_AL: return "al"; // default
222 case ARM64cc_NV: return "nv";
223 default: vpanic("showARM64CondCode");
224 }
225 }
226
227
228 /* --------- Memory address expressions (amodes). --------- */
229
ARM64AMode_RI9(HReg reg,Int simm9)230 ARM64AMode* ARM64AMode_RI9 ( HReg reg, Int simm9 ) {
231 ARM64AMode* am = LibVEX_Alloc(sizeof(ARM64AMode));
232 am->tag = ARM64am_RI9;
233 am->ARM64am.RI9.reg = reg;
234 am->ARM64am.RI9.simm9 = simm9;
235 vassert(-256 <= simm9 && simm9 <= 255);
236 return am;
237 }
238
ARM64AMode_RI12(HReg reg,Int uimm12,UChar szB)239 ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) {
240 ARM64AMode* am = LibVEX_Alloc(sizeof(ARM64AMode));
241 am->tag = ARM64am_RI12;
242 am->ARM64am.RI12.reg = reg;
243 am->ARM64am.RI12.uimm12 = uimm12;
244 am->ARM64am.RI12.szB = szB;
245 vassert(uimm12 >= 0 && uimm12 <= 4095);
246 switch (szB) {
247 case 1: case 2: case 4: case 8: break;
248 default: vassert(0);
249 }
250 return am;
251 }
252
ARM64AMode_RR(HReg base,HReg index)253 ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) {
254 ARM64AMode* am = LibVEX_Alloc(sizeof(ARM64AMode));
255 am->tag = ARM64am_RR;
256 am->ARM64am.RR.base = base;
257 am->ARM64am.RR.index = index;
258 return am;
259 }
260
ppARM64AMode(ARM64AMode * am)261 static void ppARM64AMode ( ARM64AMode* am ) {
262 switch (am->tag) {
263 case ARM64am_RI9:
264 vex_printf("%d(", am->ARM64am.RI9.simm9);
265 ppHRegARM64(am->ARM64am.RI9.reg);
266 vex_printf(")");
267 break;
268 case ARM64am_RI12:
269 vex_printf("%u(", (UInt)am->ARM64am.RI12.szB
270 * (UInt)am->ARM64am.RI12.uimm12);
271 ppHRegARM64(am->ARM64am.RI12.reg);
272 vex_printf(")");
273 break;
274 case ARM64am_RR:
275 vex_printf("(");
276 ppHRegARM64(am->ARM64am.RR.base);
277 vex_printf(",");
278 ppHRegARM64(am->ARM64am.RR.index);
279 vex_printf(")");
280 break;
281 default:
282 vassert(0);
283 }
284 }
285
addRegUsage_ARM64AMode(HRegUsage * u,ARM64AMode * am)286 static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) {
287 switch (am->tag) {
288 case ARM64am_RI9:
289 addHRegUse(u, HRmRead, am->ARM64am.RI9.reg);
290 return;
291 case ARM64am_RI12:
292 addHRegUse(u, HRmRead, am->ARM64am.RI12.reg);
293 return;
294 case ARM64am_RR:
295 addHRegUse(u, HRmRead, am->ARM64am.RR.base);
296 addHRegUse(u, HRmRead, am->ARM64am.RR.index);
297 return;
298 default:
299 vpanic("addRegUsage_ARM64Amode");
300 }
301 }
302
mapRegs_ARM64AMode(HRegRemap * m,ARM64AMode * am)303 static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) {
304 switch (am->tag) {
305 case ARM64am_RI9:
306 am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg);
307 return;
308 case ARM64am_RI12:
309 am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg);
310 return;
311 case ARM64am_RR:
312 am->ARM64am.RR.base = lookupHRegRemap(m, am->ARM64am.RR.base);
313 am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index);
314 return;
315 default:
316 vpanic("mapRegs_ARM64Amode");
317 }
318 }
319
320
321 //ZZ /* --------- Mem AModes: Addressing Mode 2 --------- */
322 //ZZ
323 //ZZ ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
324 //ZZ ARMAMode2* am = LibVEX_Alloc(sizeof(ARMAMode2));
325 //ZZ am->tag = ARMam2_RI;
326 //ZZ am->ARMam2.RI.reg = reg;
327 //ZZ am->ARMam2.RI.simm9 = simm9;
328 //ZZ vassert(-255 <= simm9 && simm9 <= 255);
329 //ZZ return am;
330 //ZZ }
331 //ZZ ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
332 //ZZ ARMAMode2* am = LibVEX_Alloc(sizeof(ARMAMode2));
333 //ZZ am->tag = ARMam2_RR;
334 //ZZ am->ARMam2.RR.base = base;
335 //ZZ am->ARMam2.RR.index = index;
336 //ZZ return am;
337 //ZZ }
338 //ZZ
339 //ZZ void ppARMAMode2 ( ARMAMode2* am ) {
340 //ZZ switch (am->tag) {
341 //ZZ case ARMam2_RI:
342 //ZZ vex_printf("%d(", am->ARMam2.RI.simm9);
343 //ZZ ppHRegARM(am->ARMam2.RI.reg);
344 //ZZ vex_printf(")");
345 //ZZ break;
346 //ZZ case ARMam2_RR:
347 //ZZ vex_printf("(");
348 //ZZ ppHRegARM(am->ARMam2.RR.base);
349 //ZZ vex_printf(",");
350 //ZZ ppHRegARM(am->ARMam2.RR.index);
351 //ZZ vex_printf(")");
352 //ZZ break;
353 //ZZ default:
354 //ZZ vassert(0);
355 //ZZ }
356 //ZZ }
357 //ZZ
358 //ZZ static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
359 //ZZ switch (am->tag) {
360 //ZZ case ARMam2_RI:
361 //ZZ addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
362 //ZZ return;
363 //ZZ case ARMam2_RR:
364 //ZZ // addHRegUse(u, HRmRead, am->ARMam2.RR.base);
365 //ZZ // addHRegUse(u, HRmRead, am->ARMam2.RR.index);
366 //ZZ // return;
367 //ZZ default:
368 //ZZ vpanic("addRegUsage_ARMAmode2");
369 //ZZ }
370 //ZZ }
371 //ZZ
372 //ZZ static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
373 //ZZ switch (am->tag) {
374 //ZZ case ARMam2_RI:
375 //ZZ am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
376 //ZZ return;
377 //ZZ case ARMam2_RR:
378 //ZZ //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
379 //ZZ //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
380 //ZZ //return;
381 //ZZ default:
382 //ZZ vpanic("mapRegs_ARMAmode2");
383 //ZZ }
384 //ZZ }
385 //ZZ
386 //ZZ
387 //ZZ /* --------- Mem AModes: Addressing Mode VFP --------- */
388 //ZZ
389 //ZZ ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
390 //ZZ ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
391 //ZZ vassert(simm11 >= -1020 && simm11 <= 1020);
392 //ZZ vassert(0 == (simm11 & 3));
393 //ZZ am->reg = reg;
394 //ZZ am->simm11 = simm11;
395 //ZZ return am;
396 //ZZ }
397 //ZZ
398 //ZZ void ppARMAModeV ( ARMAModeV* am ) {
399 //ZZ vex_printf("%d(", am->simm11);
400 //ZZ ppHRegARM(am->reg);
401 //ZZ vex_printf(")");
402 //ZZ }
403 //ZZ
404 //ZZ static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
405 //ZZ addHRegUse(u, HRmRead, am->reg);
406 //ZZ }
407 //ZZ
408 //ZZ static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
409 //ZZ am->reg = lookupHRegRemap(m, am->reg);
410 //ZZ }
411 //ZZ
412 //ZZ
413 //ZZ /* --------- Mem AModes: Addressing Mode Neon ------- */
414 //ZZ
415 //ZZ ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
416 //ZZ ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
417 //ZZ am->tag = ARMamN_RR;
418 //ZZ am->ARMamN.RR.rN = rN;
419 //ZZ am->ARMamN.RR.rM = rM;
420 //ZZ return am;
421 //ZZ }
422 //ZZ
423 //ZZ ARMAModeN *mkARMAModeN_R ( HReg rN ) {
424 //ZZ ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
425 //ZZ am->tag = ARMamN_R;
426 //ZZ am->ARMamN.R.rN = rN;
427 //ZZ return am;
428 //ZZ }
429 //ZZ
430 //ZZ static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
431 //ZZ if (am->tag == ARMamN_R) {
432 //ZZ addHRegUse(u, HRmRead, am->ARMamN.R.rN);
433 //ZZ } else {
434 //ZZ addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
435 //ZZ addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
436 //ZZ }
437 //ZZ }
438 //ZZ
439 //ZZ static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
440 //ZZ if (am->tag == ARMamN_R) {
441 //ZZ am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
442 //ZZ } else {
443 //ZZ am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
444 //ZZ am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
445 //ZZ }
446 //ZZ }
447 //ZZ
448 //ZZ void ppARMAModeN ( ARMAModeN* am ) {
449 //ZZ vex_printf("[");
450 //ZZ if (am->tag == ARMamN_R) {
451 //ZZ ppHRegARM(am->ARMamN.R.rN);
452 //ZZ } else {
453 //ZZ ppHRegARM(am->ARMamN.RR.rN);
454 //ZZ }
455 //ZZ vex_printf("]");
456 //ZZ if (am->tag == ARMamN_RR) {
457 //ZZ vex_printf(", ");
458 //ZZ ppHRegARM(am->ARMamN.RR.rM);
459 //ZZ }
460 //ZZ }
461
462
463 /* --------- Reg or uimm12<<{0,12} operands --------- */
464
ARM64RIA_I12(UShort imm12,UChar shift)465 ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) {
466 ARM64RIA* riA = LibVEX_Alloc(sizeof(ARM64RIA));
467 riA->tag = ARM64riA_I12;
468 riA->ARM64riA.I12.imm12 = imm12;
469 riA->ARM64riA.I12.shift = shift;
470 vassert(imm12 < 4096);
471 vassert(shift == 0 || shift == 12);
472 return riA;
473 }
ARM64RIA_R(HReg reg)474 ARM64RIA* ARM64RIA_R ( HReg reg ) {
475 ARM64RIA* riA = LibVEX_Alloc(sizeof(ARM64RIA));
476 riA->tag = ARM64riA_R;
477 riA->ARM64riA.R.reg = reg;
478 return riA;
479 }
480
ppARM64RIA(ARM64RIA * riA)481 static void ppARM64RIA ( ARM64RIA* riA ) {
482 switch (riA->tag) {
483 case ARM64riA_I12:
484 vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12
485 << riA->ARM64riA.I12.shift));
486 break;
487 case ARM64riA_R:
488 ppHRegARM64(riA->ARM64riA.R.reg);
489 break;
490 default:
491 vassert(0);
492 }
493 }
494
addRegUsage_ARM64RIA(HRegUsage * u,ARM64RIA * riA)495 static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) {
496 switch (riA->tag) {
497 case ARM64riA_I12:
498 return;
499 case ARM64riA_R:
500 addHRegUse(u, HRmRead, riA->ARM64riA.R.reg);
501 return;
502 default:
503 vpanic("addRegUsage_ARM64RIA");
504 }
505 }
506
mapRegs_ARM64RIA(HRegRemap * m,ARM64RIA * riA)507 static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) {
508 switch (riA->tag) {
509 case ARM64riA_I12:
510 return;
511 case ARM64riA_R:
512 riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg);
513 return;
514 default:
515 vpanic("mapRegs_ARM64RIA");
516 }
517 }
518
519
520 /* --------- Reg or "bitfield" (logic immediate) operands --------- */
521
ARM64RIL_I13(UChar bitN,UChar immR,UChar immS)522 ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) {
523 ARM64RIL* riL = LibVEX_Alloc(sizeof(ARM64RIL));
524 riL->tag = ARM64riL_I13;
525 riL->ARM64riL.I13.bitN = bitN;
526 riL->ARM64riL.I13.immR = immR;
527 riL->ARM64riL.I13.immS = immS;
528 vassert(bitN < 2);
529 vassert(immR < 64);
530 vassert(immS < 64);
531 return riL;
532 }
ARM64RIL_R(HReg reg)533 ARM64RIL* ARM64RIL_R ( HReg reg ) {
534 ARM64RIL* riL = LibVEX_Alloc(sizeof(ARM64RIL));
535 riL->tag = ARM64riL_R;
536 riL->ARM64riL.R.reg = reg;
537 return riL;
538 }
539
ppARM64RIL(ARM64RIL * riL)540 static void ppARM64RIL ( ARM64RIL* riL ) {
541 switch (riL->tag) {
542 case ARM64riL_I13:
543 vex_printf("#nrs(%u,%u,%u)",
544 (UInt)riL->ARM64riL.I13.bitN,
545 (UInt)riL->ARM64riL.I13.immR,
546 (UInt)riL->ARM64riL.I13.immS);
547 break;
548 case ARM64riL_R:
549 ppHRegARM64(riL->ARM64riL.R.reg);
550 break;
551 default:
552 vassert(0);
553 }
554 }
555
addRegUsage_ARM64RIL(HRegUsage * u,ARM64RIL * riL)556 static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) {
557 switch (riL->tag) {
558 case ARM64riL_I13:
559 return;
560 case ARM64riL_R:
561 addHRegUse(u, HRmRead, riL->ARM64riL.R.reg);
562 return;
563 default:
564 vpanic("addRegUsage_ARM64RIL");
565 }
566 }
567
mapRegs_ARM64RIL(HRegRemap * m,ARM64RIL * riL)568 static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) {
569 switch (riL->tag) {
570 case ARM64riL_I13:
571 return;
572 case ARM64riL_R:
573 riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg);
574 return;
575 default:
576 vpanic("mapRegs_ARM64RIL");
577 }
578 }
579
580
581 /* --------------- Reg or uimm6 operands --------------- */
582
ARM64RI6_I6(UInt imm6)583 ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) {
584 ARM64RI6* ri6 = LibVEX_Alloc(sizeof(ARM64RI6));
585 ri6->tag = ARM64ri6_I6;
586 ri6->ARM64ri6.I6.imm6 = imm6;
587 vassert(imm6 > 0 && imm6 < 64);
588 return ri6;
589 }
ARM64RI6_R(HReg reg)590 ARM64RI6* ARM64RI6_R ( HReg reg ) {
591 ARM64RI6* ri6 = LibVEX_Alloc(sizeof(ARM64RI6));
592 ri6->tag = ARM64ri6_R;
593 ri6->ARM64ri6.R.reg = reg;
594 return ri6;
595 }
596
ppARM64RI6(ARM64RI6 * ri6)597 static void ppARM64RI6 ( ARM64RI6* ri6 ) {
598 switch (ri6->tag) {
599 case ARM64ri6_I6:
600 vex_printf("#%u", ri6->ARM64ri6.I6.imm6);
601 break;
602 case ARM64ri6_R:
603 ppHRegARM64(ri6->ARM64ri6.R.reg);
604 break;
605 default:
606 vassert(0);
607 }
608 }
609
addRegUsage_ARM64RI6(HRegUsage * u,ARM64RI6 * ri6)610 static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) {
611 switch (ri6->tag) {
612 case ARM64ri6_I6:
613 return;
614 case ARM64ri6_R:
615 addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg);
616 return;
617 default:
618 vpanic("addRegUsage_ARM64RI6");
619 }
620 }
621
mapRegs_ARM64RI6(HRegRemap * m,ARM64RI6 * ri6)622 static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) {
623 switch (ri6->tag) {
624 case ARM64ri6_I6:
625 return;
626 case ARM64ri6_R:
627 ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg);
628 return;
629 default:
630 vpanic("mapRegs_ARM64RI6");
631 }
632 }
633
634
635 //ZZ /* -------- Neon Immediate operatnd --------- */
636 //ZZ
637 //ZZ ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
638 //ZZ ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
639 //ZZ i->type = type;
640 //ZZ i->imm8 = imm8;
641 //ZZ return i;
642 //ZZ }
643 //ZZ
644 //ZZ ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
645 //ZZ int i, j;
646 //ZZ ULong y, x = imm->imm8;
647 //ZZ switch (imm->type) {
648 //ZZ case 3:
649 //ZZ x = x << 8; /* fallthrough */
650 //ZZ case 2:
651 //ZZ x = x << 8; /* fallthrough */
652 //ZZ case 1:
653 //ZZ x = x << 8; /* fallthrough */
654 //ZZ case 0:
655 //ZZ return (x << 32) | x;
656 //ZZ case 5:
657 //ZZ case 6:
658 //ZZ if (imm->type == 5)
659 //ZZ x = x << 8;
660 //ZZ else
661 //ZZ x = (x << 8) | x;
662 //ZZ /* fallthrough */
663 //ZZ case 4:
664 //ZZ x = (x << 16) | x;
665 //ZZ return (x << 32) | x;
666 //ZZ case 8:
667 //ZZ x = (x << 8) | 0xFF;
668 //ZZ /* fallthrough */
669 //ZZ case 7:
670 //ZZ x = (x << 8) | 0xFF;
671 //ZZ return (x << 32) | x;
672 //ZZ case 9:
673 //ZZ x = 0;
674 //ZZ for (i = 7; i >= 0; i--) {
675 //ZZ y = ((ULong)imm->imm8 >> i) & 1;
676 //ZZ for (j = 0; j < 8; j++) {
677 //ZZ x = (x << 1) | y;
678 //ZZ }
679 //ZZ }
680 //ZZ return x;
681 //ZZ case 10:
682 //ZZ x |= (x & 0x80) << 5;
683 //ZZ x |= (~x & 0x40) << 5;
684 //ZZ x &= 0x187F; /* 0001 1000 0111 1111 */
685 //ZZ x |= (x & 0x40) << 4;
686 //ZZ x |= (x & 0x40) << 3;
687 //ZZ x |= (x & 0x40) << 2;
688 //ZZ x |= (x & 0x40) << 1;
689 //ZZ x = x << 19;
690 //ZZ x = (x << 32) | x;
691 //ZZ return x;
692 //ZZ default:
693 //ZZ vpanic("ARMNImm_to_Imm64");
694 //ZZ }
695 //ZZ }
696 //ZZ
697 //ZZ ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
698 //ZZ ARMNImm tmp;
699 //ZZ if ((x & 0xFFFFFFFF) == (x >> 32)) {
700 //ZZ if ((x & 0xFFFFFF00) == 0)
701 //ZZ return ARMNImm_TI(0, x & 0xFF);
702 //ZZ if ((x & 0xFFFF00FF) == 0)
703 //ZZ return ARMNImm_TI(1, (x >> 8) & 0xFF);
704 //ZZ if ((x & 0xFF00FFFF) == 0)
705 //ZZ return ARMNImm_TI(2, (x >> 16) & 0xFF);
706 //ZZ if ((x & 0x00FFFFFF) == 0)
707 //ZZ return ARMNImm_TI(3, (x >> 24) & 0xFF);
708 //ZZ if ((x & 0xFFFF00FF) == 0xFF)
709 //ZZ return ARMNImm_TI(7, (x >> 8) & 0xFF);
710 //ZZ if ((x & 0xFF00FFFF) == 0xFFFF)
711 //ZZ return ARMNImm_TI(8, (x >> 16) & 0xFF);
712 //ZZ if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
713 //ZZ if ((x & 0xFF00) == 0)
714 //ZZ return ARMNImm_TI(4, x & 0xFF);
715 //ZZ if ((x & 0x00FF) == 0)
716 //ZZ return ARMNImm_TI(5, (x >> 8) & 0xFF);
717 //ZZ if ((x & 0xFF) == ((x >> 8) & 0xFF))
718 //ZZ return ARMNImm_TI(6, x & 0xFF);
719 //ZZ }
720 //ZZ if ((x & 0x7FFFF) == 0) {
721 //ZZ tmp.type = 10;
722 //ZZ tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
723 //ZZ if (ARMNImm_to_Imm64(&tmp) == x)
724 //ZZ return ARMNImm_TI(tmp.type, tmp.imm8);
725 //ZZ }
726 //ZZ } else {
727 //ZZ /* This can only be type 9. */
728 //ZZ tmp.imm8 = (((x >> 56) & 1) << 7)
729 //ZZ | (((x >> 48) & 1) << 6)
730 //ZZ | (((x >> 40) & 1) << 5)
731 //ZZ | (((x >> 32) & 1) << 4)
732 //ZZ | (((x >> 24) & 1) << 3)
733 //ZZ | (((x >> 16) & 1) << 2)
734 //ZZ | (((x >> 8) & 1) << 1)
735 //ZZ | (((x >> 0) & 1) << 0);
736 //ZZ tmp.type = 9;
737 //ZZ if (ARMNImm_to_Imm64 (&tmp) == x)
738 //ZZ return ARMNImm_TI(tmp.type, tmp.imm8);
739 //ZZ }
740 //ZZ return NULL;
741 //ZZ }
742 //ZZ
743 //ZZ void ppARMNImm (ARMNImm* i) {
744 //ZZ ULong x = ARMNImm_to_Imm64(i);
745 //ZZ vex_printf("0x%llX%llX", x, x);
746 //ZZ }
747 //ZZ
748 //ZZ /* -- Register or scalar operand --- */
749 //ZZ
750 //ZZ ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
751 //ZZ {
752 //ZZ ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
753 //ZZ p->tag = tag;
754 //ZZ p->reg = reg;
755 //ZZ p->index = index;
756 //ZZ return p;
757 //ZZ }
758 //ZZ
759 //ZZ void ppARMNRS(ARMNRS *p)
760 //ZZ {
761 //ZZ ppHRegARM(p->reg);
762 //ZZ if (p->tag == ARMNRS_Scalar) {
763 //ZZ vex_printf("[%d]", p->index);
764 //ZZ }
765 //ZZ }
766
767 /* --------- Instructions. --------- */
768
showARM64LogicOp(ARM64LogicOp op)769 static const HChar* showARM64LogicOp ( ARM64LogicOp op ) {
770 switch (op) {
771 case ARM64lo_AND: return "and";
772 case ARM64lo_OR: return "orr";
773 case ARM64lo_XOR: return "eor";
774 default: vpanic("showARM64LogicOp");
775 }
776 }
777
showARM64ShiftOp(ARM64ShiftOp op)778 static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
779 switch (op) {
780 case ARM64sh_SHL: return "lsl";
781 case ARM64sh_SHR: return "lsr";
782 case ARM64sh_SAR: return "asr";
783 default: vpanic("showARM64ShiftOp");
784 }
785 }
786
showARM64UnaryOp(ARM64UnaryOp op)787 static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
788 switch (op) {
789 case ARM64un_NEG: return "neg";
790 case ARM64un_NOT: return "not";
791 case ARM64un_CLZ: return "clz";
792 default: vpanic("showARM64UnaryOp");
793 }
794 }
795
showARM64MulOp(ARM64MulOp op)796 static const HChar* showARM64MulOp ( ARM64MulOp op ) {
797 switch (op) {
798 case ARM64mul_PLAIN: return "mul ";
799 case ARM64mul_ZX: return "umulh";
800 case ARM64mul_SX: return "smulh";
801 default: vpanic("showARM64MulOp");
802 }
803 }
804
characteriseARM64CvtOp(HChar * syn,UInt * fszB,UInt * iszB,ARM64CvtOp op)805 static void characteriseARM64CvtOp ( /*OUT*/HChar* syn,
806 /*OUT*/UInt* fszB, /*OUT*/UInt* iszB,
807 ARM64CvtOp op ) {
808 switch (op) {
809 case ARM64cvt_F32_I32S:
810 *syn = 's'; *fszB = 4; *iszB = 4; break;
811 case ARM64cvt_F64_I32S:
812 *syn = 's'; *fszB = 8; *iszB = 4; break;
813 case ARM64cvt_F32_I64S:
814 *syn = 's'; *fszB = 4; *iszB = 8; break;
815 case ARM64cvt_F64_I64S:
816 *syn = 's'; *fszB = 8; *iszB = 8; break;
817 case ARM64cvt_F32_I32U:
818 *syn = 'u'; *fszB = 4; *iszB = 4; break;
819 case ARM64cvt_F64_I32U:
820 *syn = 'u'; *fszB = 8; *iszB = 4; break;
821 case ARM64cvt_F32_I64U:
822 *syn = 'u'; *fszB = 4; *iszB = 8; break;
823 case ARM64cvt_F64_I64U:
824 *syn = 'u'; *fszB = 8; *iszB = 8; break;
825 default:
826 vpanic("characteriseARM64CvtOp");
827 }
828 }
829
showARM64FpBinOp(ARM64FpBinOp op)830 static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
831 switch (op) {
832 case ARM64fpb_ADD: return "add";
833 case ARM64fpb_SUB: return "sub";
834 case ARM64fpb_MUL: return "mul";
835 case ARM64fpb_DIV: return "div";
836 default: vpanic("showARM64FpBinOp");
837 }
838 }
839
showARM64FpUnaryOp(ARM64FpUnaryOp op)840 static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
841 switch (op) {
842 case ARM64fpu_NEG: return "neg ";
843 case ARM64fpu_ABS: return "abs ";
844 case ARM64fpu_SQRT: return "sqrt ";
845 case ARM64fpu_RINT: return "rinti";
846 default: vpanic("showARM64FpUnaryOp");
847 }
848 }
849
showARM64VecBinOp(const HChar ** nm,const HChar ** ar,ARM64VecBinOp op)850 static void showARM64VecBinOp(/*OUT*/const HChar** nm,
851 /*OUT*/const HChar** ar, ARM64VecBinOp op ) {
852 switch (op) {
853 case ARM64vecb_ADD64x2: *nm = "add "; *ar = "2d"; return;
854 case ARM64vecb_ADD32x4: *nm = "add "; *ar = "4s"; return;
855 case ARM64vecb_ADD16x8: *nm = "add "; *ar = "8h"; return;
856 case ARM64vecb_ADD8x16: *nm = "add "; *ar = "16b"; return;
857 case ARM64vecb_SUB64x2: *nm = "sub "; *ar = "2d"; return;
858 case ARM64vecb_SUB32x4: *nm = "sub "; *ar = "4s"; return;
859 case ARM64vecb_SUB16x8: *nm = "sub "; *ar = "8h"; return;
860 case ARM64vecb_SUB8x16: *nm = "sub "; *ar = "16b"; return;
861 case ARM64vecb_MUL32x4: *nm = "mul "; *ar = "4s"; return;
862 case ARM64vecb_MUL16x8: *nm = "mul "; *ar = "8h"; return;
863 case ARM64vecb_MUL8x16: *nm = "mul "; *ar = "16b"; return;
864 case ARM64vecb_FADD64x2: *nm = "fadd"; *ar = "2d"; return;
865 case ARM64vecb_FSUB64x2: *nm = "fsub"; *ar = "2d"; return;
866 case ARM64vecb_FMUL64x2: *nm = "fmul"; *ar = "2d"; return;
867 case ARM64vecb_FDIV64x2: *nm = "fdiv"; *ar = "2d"; return;
868 case ARM64vecb_FADD32x4: *nm = "fadd"; *ar = "4s"; return;
869 case ARM64vecb_FSUB32x4: *nm = "fsub"; *ar = "4s"; return;
870 case ARM64vecb_FMUL32x4: *nm = "fmul"; *ar = "4s"; return;
871 case ARM64vecb_FDIV32x4: *nm = "fdiv"; *ar = "4s"; return;
872 case ARM64vecb_UMAX32x4: *nm = "umax"; *ar = "4s"; return;
873 case ARM64vecb_UMAX16x8: *nm = "umax"; *ar = "8h"; return;
874 case ARM64vecb_UMAX8x16: *nm = "umax"; *ar = "16b"; return;
875 case ARM64vecb_UMIN32x4: *nm = "umin"; *ar = "4s"; return;
876 case ARM64vecb_UMIN16x8: *nm = "umin"; *ar = "8h"; return;
877 case ARM64vecb_UMIN8x16: *nm = "umin"; *ar = "16b"; return;
878 case ARM64vecb_UMULL32x2: *nm = "umull"; *ar = "2d"; return;
879 case ARM64vecb_UMULL16x4: *nm = "umull"; *ar = "4s"; return;
880 case ARM64vecb_UMULL8x8: *nm = "umull"; *ar = "8b"; return;
881 case ARM64vecb_SMAX32x4: *nm = "smax"; *ar = "4s"; return;
882 case ARM64vecb_SMAX16x8: *nm = "smax"; *ar = "8h"; return;
883 case ARM64vecb_SMAX8x16: *nm = "smax"; *ar = "16b"; return;
884 case ARM64vecb_SMIN32x4: *nm = "smin"; *ar = "4s"; return;
885 case ARM64vecb_SMIN16x8: *nm = "smin"; *ar = "8h"; return;
886 case ARM64vecb_SMIN8x16: *nm = "smin"; *ar = "16b"; return;
887 case ARM64vecb_AND: *nm = "and "; *ar = "all"; return;
888 case ARM64vecb_ORR: *nm = "orr "; *ar = "all"; return;
889 case ARM64vecb_XOR: *nm = "eor "; *ar = "all"; return;
890 case ARM64vecb_CMEQ64x2: *nm = "cmeq"; *ar = "2d"; return;
891 case ARM64vecb_CMEQ32x4: *nm = "cmeq"; *ar = "4s"; return;
892 case ARM64vecb_CMEQ16x8: *nm = "cmeq"; *ar = "8h"; return;
893 case ARM64vecb_CMEQ8x16: *nm = "cmeq"; *ar = "16b"; return;
894 case ARM64vecb_CMHI64x2: *nm = "cmhi"; *ar = "2d"; return;
895 case ARM64vecb_CMHI32x4: *nm = "cmhi"; *ar = "4s"; return;
896 case ARM64vecb_CMHI16x8: *nm = "cmhi"; *ar = "8h"; return;
897 case ARM64vecb_CMHI8x16: *nm = "cmhi"; *ar = "16b"; return;
898 case ARM64vecb_CMGT64x2: *nm = "cmgt"; *ar = "2d"; return;
899 case ARM64vecb_CMGT32x4: *nm = "cmgt"; *ar = "4s"; return;
900 case ARM64vecb_CMGT16x8: *nm = "cmgt"; *ar = "8h"; return;
901 case ARM64vecb_CMGT8x16: *nm = "cmgt"; *ar = "16b"; return;
902 case ARM64vecb_FCMEQ64x2: *nm = "fcmeq"; *ar = "2d"; return;
903 case ARM64vecb_FCMEQ32x4: *nm = "fcmeq"; *ar = "4s"; return;
904 case ARM64vecb_FCMGE64x2: *nm = "fcmge"; *ar = "2d"; return;
905 case ARM64vecb_FCMGE32x4: *nm = "fcmge"; *ar = "4s"; return;
906 case ARM64vecb_FCMGT64x2: *nm = "fcmgt"; *ar = "2d"; return;
907 case ARM64vecb_FCMGT32x4: *nm = "fcmgt"; *ar = "4s"; return;
908 case ARM64vecb_TBL1: *nm = "tbl "; *ar = "16b"; return;
909 default: vpanic("showARM64VecBinOp");
910 }
911 }
912
showARM64VecUnaryOp(const HChar ** nm,const HChar ** ar,ARM64VecUnaryOp op)913 static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
914 /*OUT*/const HChar** ar, ARM64VecUnaryOp op )
915 {
916 switch (op) {
917 case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return;
918 case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return;
919 case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return;
920 case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return;
921 case ARM64vecu_VMOVL8U: *nm = "vmovl.u8"; *ar = "all"; return;
922 case ARM64vecu_VMOVL16U: *nm = "vmovl.u16"; *ar = "all"; return;
923 case ARM64vecu_VMOVL32U: *nm = "vmovl.u32"; *ar = "all"; return;
924 case ARM64vecu_VMOVL8S: *nm = "vmovl.s8"; *ar = "all"; return;
925 case ARM64vecu_VMOVL16S: *nm = "vmovl.s16"; *ar = "all"; return;
926 case ARM64vecu_VMOVL32S: *nm = "vmovl.s32"; *ar = "all"; return;
927 case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return;
928 case ARM64vecu_CNT: *nm = "cnt "; *ar = "16b"; return;
929 case ARM64vecu_UADDLV8x16: *nm = "uaddlv "; *ar = "16b"; return;
930 case ARM64vecu_UADDLV16x8: *nm = "uaddlv "; *ar = "8h"; return;
931 case ARM64vecu_UADDLV32x4: *nm = "uaddlv "; *ar = "4s"; return;
932 case ARM64vecu_SADDLV8x16: *nm = "saddlv "; *ar = "16b"; return;
933 case ARM64vecu_SADDLV16x8: *nm = "saddlv "; *ar = "8h"; return;
934 case ARM64vecu_SADDLV32x4: *nm = "saddlv "; *ar = "4s"; return;
935 default: vpanic("showARM64VecUnaryOp");
936 }
937 }
938
showARM64VecShiftOp(const HChar ** nm,const HChar ** ar,ARM64VecShiftOp op)939 static void showARM64VecShiftOp(/*OUT*/const HChar** nm,
940 /*OUT*/const HChar** ar,
941 ARM64VecShiftOp op )
942 {
943 switch (op) {
944 case ARM64vecsh_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
945 case ARM64vecsh_USHR32x4: *nm = "ushr "; *ar = "4s"; return;
946 case ARM64vecsh_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
947 case ARM64vecsh_USHR8x16: *nm = "ushr "; *ar = "16b"; return;
948 case ARM64vecsh_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
949 case ARM64vecsh_SSHR32x4: *nm = "sshr "; *ar = "4s"; return;
950 case ARM64vecsh_SSHR16x8: *nm = "sshr "; *ar = "8h"; return;
951 case ARM64vecsh_SSHR8x16: *nm = "sshr "; *ar = "16b"; return;
952 case ARM64vecsh_SHL64x2: *nm = "shl "; *ar = "2d"; return;
953 case ARM64vecsh_SHL32x4: *nm = "shl "; *ar = "4s"; return;
954 case ARM64vecsh_SHL16x8: *nm = "shl "; *ar = "8h"; return;
955 case ARM64vecsh_SHL8x16: *nm = "shl "; *ar = "16b"; return;
956 default: vpanic("showARM64VecShiftImmOp");
957 }
958 }
959
960 //ZZ const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
961 //ZZ switch (op) {
962 //ZZ case ARMneon_VAND: return "vand";
963 //ZZ case ARMneon_VORR: return "vorr";
964 //ZZ case ARMneon_VXOR: return "veor";
965 //ZZ case ARMneon_VADD: return "vadd";
966 //ZZ case ARMneon_VRHADDS: return "vrhadd";
967 //ZZ case ARMneon_VRHADDU: return "vrhadd";
968 //ZZ case ARMneon_VADDFP: return "vadd";
969 //ZZ case ARMneon_VPADDFP: return "vpadd";
970 //ZZ case ARMneon_VABDFP: return "vabd";
971 //ZZ case ARMneon_VSUB: return "vsub";
972 //ZZ case ARMneon_VSUBFP: return "vsub";
973 //ZZ case ARMneon_VMINU: return "vmin";
974 //ZZ case ARMneon_VMINS: return "vmin";
975 //ZZ case ARMneon_VMINF: return "vmin";
976 //ZZ case ARMneon_VMAXU: return "vmax";
977 //ZZ case ARMneon_VMAXS: return "vmax";
978 //ZZ case ARMneon_VMAXF: return "vmax";
979 //ZZ case ARMneon_VQADDU: return "vqadd";
980 //ZZ case ARMneon_VQADDS: return "vqadd";
981 //ZZ case ARMneon_VQSUBU: return "vqsub";
982 //ZZ case ARMneon_VQSUBS: return "vqsub";
983 //ZZ case ARMneon_VCGTU: return "vcgt";
984 //ZZ case ARMneon_VCGTS: return "vcgt";
985 //ZZ case ARMneon_VCGTF: return "vcgt";
986 //ZZ case ARMneon_VCGEF: return "vcgt";
987 //ZZ case ARMneon_VCGEU: return "vcge";
988 //ZZ case ARMneon_VCGES: return "vcge";
989 //ZZ case ARMneon_VCEQ: return "vceq";
990 //ZZ case ARMneon_VCEQF: return "vceq";
991 //ZZ case ARMneon_VPADD: return "vpadd";
992 //ZZ case ARMneon_VPMINU: return "vpmin";
993 //ZZ case ARMneon_VPMINS: return "vpmin";
994 //ZZ case ARMneon_VPMINF: return "vpmin";
995 //ZZ case ARMneon_VPMAXU: return "vpmax";
996 //ZZ case ARMneon_VPMAXS: return "vpmax";
997 //ZZ case ARMneon_VPMAXF: return "vpmax";
998 //ZZ case ARMneon_VEXT: return "vext";
999 //ZZ case ARMneon_VMUL: return "vmuli";
1000 //ZZ case ARMneon_VMULLU: return "vmull";
1001 //ZZ case ARMneon_VMULLS: return "vmull";
1002 //ZZ case ARMneon_VMULP: return "vmul";
1003 //ZZ case ARMneon_VMULFP: return "vmul";
1004 //ZZ case ARMneon_VMULLP: return "vmul";
1005 //ZZ case ARMneon_VQDMULH: return "vqdmulh";
1006 //ZZ case ARMneon_VQRDMULH: return "vqrdmulh";
1007 //ZZ case ARMneon_VQDMULL: return "vqdmull";
1008 //ZZ case ARMneon_VTBL: return "vtbl";
1009 //ZZ case ARMneon_VRECPS: return "vrecps";
1010 //ZZ case ARMneon_VRSQRTS: return "vrecps";
1011 //ZZ /* ... */
1012 //ZZ default: vpanic("showARMNeonBinOp");
1013 //ZZ }
1014 //ZZ }
1015 //ZZ
1016 //ZZ const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
1017 //ZZ switch (op) {
1018 //ZZ case ARMneon_VAND:
1019 //ZZ case ARMneon_VORR:
1020 //ZZ case ARMneon_VXOR:
1021 //ZZ return "";
1022 //ZZ case ARMneon_VADD:
1023 //ZZ case ARMneon_VSUB:
1024 //ZZ case ARMneon_VEXT:
1025 //ZZ case ARMneon_VMUL:
1026 //ZZ case ARMneon_VPADD:
1027 //ZZ case ARMneon_VTBL:
1028 //ZZ case ARMneon_VCEQ:
1029 //ZZ return ".i";
1030 //ZZ case ARMneon_VRHADDU:
1031 //ZZ case ARMneon_VMINU:
1032 //ZZ case ARMneon_VMAXU:
1033 //ZZ case ARMneon_VQADDU:
1034 //ZZ case ARMneon_VQSUBU:
1035 //ZZ case ARMneon_VCGTU:
1036 //ZZ case ARMneon_VCGEU:
1037 //ZZ case ARMneon_VMULLU:
1038 //ZZ case ARMneon_VPMINU:
1039 //ZZ case ARMneon_VPMAXU:
1040 //ZZ return ".u";
1041 //ZZ case ARMneon_VRHADDS:
1042 //ZZ case ARMneon_VMINS:
1043 //ZZ case ARMneon_VMAXS:
1044 //ZZ case ARMneon_VQADDS:
1045 //ZZ case ARMneon_VQSUBS:
1046 //ZZ case ARMneon_VCGTS:
1047 //ZZ case ARMneon_VCGES:
1048 //ZZ case ARMneon_VQDMULL:
1049 //ZZ case ARMneon_VMULLS:
1050 //ZZ case ARMneon_VPMINS:
1051 //ZZ case ARMneon_VPMAXS:
1052 //ZZ case ARMneon_VQDMULH:
1053 //ZZ case ARMneon_VQRDMULH:
1054 //ZZ return ".s";
1055 //ZZ case ARMneon_VMULP:
1056 //ZZ case ARMneon_VMULLP:
1057 //ZZ return ".p";
1058 //ZZ case ARMneon_VADDFP:
1059 //ZZ case ARMneon_VABDFP:
1060 //ZZ case ARMneon_VPADDFP:
1061 //ZZ case ARMneon_VSUBFP:
1062 //ZZ case ARMneon_VMULFP:
1063 //ZZ case ARMneon_VMINF:
1064 //ZZ case ARMneon_VMAXF:
1065 //ZZ case ARMneon_VPMINF:
1066 //ZZ case ARMneon_VPMAXF:
1067 //ZZ case ARMneon_VCGTF:
1068 //ZZ case ARMneon_VCGEF:
1069 //ZZ case ARMneon_VCEQF:
1070 //ZZ case ARMneon_VRECPS:
1071 //ZZ case ARMneon_VRSQRTS:
1072 //ZZ return ".f";
1073 //ZZ /* ... */
1074 //ZZ default: vpanic("showARMNeonBinOpDataType");
1075 //ZZ }
1076 //ZZ }
1077 //ZZ
1078 //ZZ const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
1079 //ZZ switch (op) {
1080 //ZZ case ARMneon_COPY: return "vmov";
1081 //ZZ case ARMneon_COPYLS: return "vmov";
1082 //ZZ case ARMneon_COPYLU: return "vmov";
1083 //ZZ case ARMneon_COPYN: return "vmov";
1084 //ZZ case ARMneon_COPYQNSS: return "vqmovn";
1085 //ZZ case ARMneon_COPYQNUS: return "vqmovun";
1086 //ZZ case ARMneon_COPYQNUU: return "vqmovn";
1087 //ZZ case ARMneon_NOT: return "vmvn";
1088 //ZZ case ARMneon_EQZ: return "vceq";
1089 //ZZ case ARMneon_CNT: return "vcnt";
1090 //ZZ case ARMneon_CLS: return "vcls";
1091 //ZZ case ARMneon_CLZ: return "vclz";
1092 //ZZ case ARMneon_DUP: return "vdup";
1093 //ZZ case ARMneon_PADDLS: return "vpaddl";
1094 //ZZ case ARMneon_PADDLU: return "vpaddl";
1095 //ZZ case ARMneon_VQSHLNSS: return "vqshl";
1096 //ZZ case ARMneon_VQSHLNUU: return "vqshl";
1097 //ZZ case ARMneon_VQSHLNUS: return "vqshlu";
1098 //ZZ case ARMneon_REV16: return "vrev16";
1099 //ZZ case ARMneon_REV32: return "vrev32";
1100 //ZZ case ARMneon_REV64: return "vrev64";
1101 //ZZ case ARMneon_VCVTFtoU: return "vcvt";
1102 //ZZ case ARMneon_VCVTFtoS: return "vcvt";
1103 //ZZ case ARMneon_VCVTUtoF: return "vcvt";
1104 //ZZ case ARMneon_VCVTStoF: return "vcvt";
1105 //ZZ case ARMneon_VCVTFtoFixedU: return "vcvt";
1106 //ZZ case ARMneon_VCVTFtoFixedS: return "vcvt";
1107 //ZZ case ARMneon_VCVTFixedUtoF: return "vcvt";
1108 //ZZ case ARMneon_VCVTFixedStoF: return "vcvt";
1109 //ZZ case ARMneon_VCVTF32toF16: return "vcvt";
1110 //ZZ case ARMneon_VCVTF16toF32: return "vcvt";
1111 //ZZ case ARMneon_VRECIP: return "vrecip";
1112 //ZZ case ARMneon_VRECIPF: return "vrecipf";
1113 //ZZ case ARMneon_VNEGF: return "vneg";
1114 //ZZ case ARMneon_ABS: return "vabs";
1115 //ZZ case ARMneon_VABSFP: return "vabsfp";
1116 //ZZ case ARMneon_VRSQRTEFP: return "vrsqrtefp";
1117 //ZZ case ARMneon_VRSQRTE: return "vrsqrte";
1118 //ZZ /* ... */
1119 //ZZ default: vpanic("showARMNeonUnOp");
1120 //ZZ }
1121 //ZZ }
1122 //ZZ
1123 //ZZ const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
1124 //ZZ switch (op) {
1125 //ZZ case ARMneon_COPY:
1126 //ZZ case ARMneon_NOT:
1127 //ZZ return "";
1128 //ZZ case ARMneon_COPYN:
1129 //ZZ case ARMneon_EQZ:
1130 //ZZ case ARMneon_CNT:
1131 //ZZ case ARMneon_DUP:
1132 //ZZ case ARMneon_REV16:
1133 //ZZ case ARMneon_REV32:
1134 //ZZ case ARMneon_REV64:
1135 //ZZ return ".i";
1136 //ZZ case ARMneon_COPYLU:
1137 //ZZ case ARMneon_PADDLU:
1138 //ZZ case ARMneon_COPYQNUU:
1139 //ZZ case ARMneon_VQSHLNUU:
1140 //ZZ case ARMneon_VRECIP:
1141 //ZZ case ARMneon_VRSQRTE:
1142 //ZZ return ".u";
1143 //ZZ case ARMneon_CLS:
1144 //ZZ case ARMneon_CLZ:
1145 //ZZ case ARMneon_COPYLS:
1146 //ZZ case ARMneon_PADDLS:
1147 //ZZ case ARMneon_COPYQNSS:
1148 //ZZ case ARMneon_COPYQNUS:
1149 //ZZ case ARMneon_VQSHLNSS:
1150 //ZZ case ARMneon_VQSHLNUS:
1151 //ZZ case ARMneon_ABS:
1152 //ZZ return ".s";
1153 //ZZ case ARMneon_VRECIPF:
1154 //ZZ case ARMneon_VNEGF:
1155 //ZZ case ARMneon_VABSFP:
1156 //ZZ case ARMneon_VRSQRTEFP:
1157 //ZZ return ".f";
1158 //ZZ case ARMneon_VCVTFtoU: return ".u32.f32";
1159 //ZZ case ARMneon_VCVTFtoS: return ".s32.f32";
1160 //ZZ case ARMneon_VCVTUtoF: return ".f32.u32";
1161 //ZZ case ARMneon_VCVTStoF: return ".f32.s32";
1162 //ZZ case ARMneon_VCVTF16toF32: return ".f32.f16";
1163 //ZZ case ARMneon_VCVTF32toF16: return ".f16.f32";
1164 //ZZ case ARMneon_VCVTFtoFixedU: return ".u32.f32";
1165 //ZZ case ARMneon_VCVTFtoFixedS: return ".s32.f32";
1166 //ZZ case ARMneon_VCVTFixedUtoF: return ".f32.u32";
1167 //ZZ case ARMneon_VCVTFixedStoF: return ".f32.s32";
1168 //ZZ /* ... */
1169 //ZZ default: vpanic("showARMNeonUnOpDataType");
1170 //ZZ }
1171 //ZZ }
1172 //ZZ
1173 //ZZ const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
1174 //ZZ switch (op) {
1175 //ZZ case ARMneon_SETELEM: return "vmov";
1176 //ZZ case ARMneon_GETELEMU: return "vmov";
1177 //ZZ case ARMneon_GETELEMS: return "vmov";
1178 //ZZ case ARMneon_VDUP: return "vdup";
1179 //ZZ /* ... */
1180 //ZZ default: vpanic("showARMNeonUnarySOp");
1181 //ZZ }
1182 //ZZ }
1183 //ZZ
1184 //ZZ const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
1185 //ZZ switch (op) {
1186 //ZZ case ARMneon_SETELEM:
1187 //ZZ case ARMneon_VDUP:
1188 //ZZ return ".i";
1189 //ZZ case ARMneon_GETELEMS:
1190 //ZZ return ".s";
1191 //ZZ case ARMneon_GETELEMU:
1192 //ZZ return ".u";
1193 //ZZ /* ... */
1194 //ZZ default: vpanic("showARMNeonUnarySOp");
1195 //ZZ }
1196 //ZZ }
1197 //ZZ
1198 //ZZ const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
1199 //ZZ switch (op) {
1200 //ZZ case ARMneon_VSHL: return "vshl";
1201 //ZZ case ARMneon_VSAL: return "vshl";
1202 //ZZ case ARMneon_VQSHL: return "vqshl";
1203 //ZZ case ARMneon_VQSAL: return "vqshl";
1204 //ZZ /* ... */
1205 //ZZ default: vpanic("showARMNeonShiftOp");
1206 //ZZ }
1207 //ZZ }
1208 //ZZ
1209 //ZZ const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
1210 //ZZ switch (op) {
1211 //ZZ case ARMneon_VSHL:
1212 //ZZ case ARMneon_VQSHL:
1213 //ZZ return ".u";
1214 //ZZ case ARMneon_VSAL:
1215 //ZZ case ARMneon_VQSAL:
1216 //ZZ return ".s";
1217 //ZZ /* ... */
1218 //ZZ default: vpanic("showARMNeonShiftOpDataType");
1219 //ZZ }
1220 //ZZ }
1221 //ZZ
1222 //ZZ const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
1223 //ZZ switch (op) {
1224 //ZZ case ARMneon_TRN: return "vtrn";
1225 //ZZ case ARMneon_ZIP: return "vzip";
1226 //ZZ case ARMneon_UZP: return "vuzp";
1227 //ZZ /* ... */
1228 //ZZ default: vpanic("showARMNeonDualOp");
1229 //ZZ }
1230 //ZZ }
1231 //ZZ
1232 //ZZ const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1233 //ZZ switch (op) {
1234 //ZZ case ARMneon_TRN:
1235 //ZZ case ARMneon_ZIP:
1236 //ZZ case ARMneon_UZP:
1237 //ZZ return "i";
1238 //ZZ /* ... */
1239 //ZZ default: vpanic("showARMNeonDualOp");
1240 //ZZ }
1241 //ZZ }
1242 //ZZ
1243 //ZZ static const HChar* showARMNeonDataSize_wrk ( UInt size )
1244 //ZZ {
1245 //ZZ switch (size) {
1246 //ZZ case 0: return "8";
1247 //ZZ case 1: return "16";
1248 //ZZ case 2: return "32";
1249 //ZZ case 3: return "64";
1250 //ZZ default: vpanic("showARMNeonDataSize");
1251 //ZZ }
1252 //ZZ }
1253 //ZZ
1254 //ZZ static const HChar* showARMNeonDataSize ( ARMInstr* i )
1255 //ZZ {
1256 //ZZ switch (i->tag) {
1257 //ZZ case ARMin_NBinary:
1258 //ZZ if (i->ARMin.NBinary.op == ARMneon_VEXT)
1259 //ZZ return "8";
1260 //ZZ if (i->ARMin.NBinary.op == ARMneon_VAND ||
1261 //ZZ i->ARMin.NBinary.op == ARMneon_VORR ||
1262 //ZZ i->ARMin.NBinary.op == ARMneon_VXOR)
1263 //ZZ return "";
1264 //ZZ return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1265 //ZZ case ARMin_NUnary:
1266 //ZZ if (i->ARMin.NUnary.op == ARMneon_COPY ||
1267 //ZZ i->ARMin.NUnary.op == ARMneon_NOT ||
1268 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1269 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1270 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1271 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1272 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1273 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1274 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1275 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1276 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1277 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1278 //ZZ return "";
1279 //ZZ if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1280 //ZZ i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1281 //ZZ i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1282 //ZZ UInt size;
1283 //ZZ size = i->ARMin.NUnary.size;
1284 //ZZ if (size & 0x40)
1285 //ZZ return "64";
1286 //ZZ if (size & 0x20)
1287 //ZZ return "32";
1288 //ZZ if (size & 0x10)
1289 //ZZ return "16";
1290 //ZZ if (size & 0x08)
1291 //ZZ return "8";
1292 //ZZ vpanic("showARMNeonDataSize");
1293 //ZZ }
1294 //ZZ return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1295 //ZZ case ARMin_NUnaryS:
1296 //ZZ if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1297 //ZZ int size;
1298 //ZZ size = i->ARMin.NUnaryS.size;
1299 //ZZ if ((size & 1) == 1)
1300 //ZZ return "8";
1301 //ZZ if ((size & 3) == 2)
1302 //ZZ return "16";
1303 //ZZ if ((size & 7) == 4)
1304 //ZZ return "32";
1305 //ZZ vpanic("showARMNeonDataSize");
1306 //ZZ }
1307 //ZZ return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1308 //ZZ case ARMin_NShift:
1309 //ZZ return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1310 //ZZ case ARMin_NDual:
1311 //ZZ return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1312 //ZZ default:
1313 //ZZ vpanic("showARMNeonDataSize");
1314 //ZZ }
1315 //ZZ }
1316
ARM64Instr_Arith(HReg dst,HReg argL,ARM64RIA * argR,Bool isAdd)1317 ARM64Instr* ARM64Instr_Arith ( HReg dst,
1318 HReg argL, ARM64RIA* argR, Bool isAdd ) {
1319 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1320 i->tag = ARM64in_Arith;
1321 i->ARM64in.Arith.dst = dst;
1322 i->ARM64in.Arith.argL = argL;
1323 i->ARM64in.Arith.argR = argR;
1324 i->ARM64in.Arith.isAdd = isAdd;
1325 return i;
1326 }
ARM64Instr_Cmp(HReg argL,ARM64RIA * argR,Bool is64)1327 ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) {
1328 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1329 i->tag = ARM64in_Cmp;
1330 i->ARM64in.Cmp.argL = argL;
1331 i->ARM64in.Cmp.argR = argR;
1332 i->ARM64in.Cmp.is64 = is64;
1333 return i;
1334 }
ARM64Instr_Logic(HReg dst,HReg argL,ARM64RIL * argR,ARM64LogicOp op)1335 ARM64Instr* ARM64Instr_Logic ( HReg dst,
1336 HReg argL, ARM64RIL* argR, ARM64LogicOp op ) {
1337 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1338 i->tag = ARM64in_Logic;
1339 i->ARM64in.Logic.dst = dst;
1340 i->ARM64in.Logic.argL = argL;
1341 i->ARM64in.Logic.argR = argR;
1342 i->ARM64in.Logic.op = op;
1343 return i;
1344 }
ARM64Instr_Test(HReg argL,ARM64RIL * argR)1345 ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
1346 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1347 i->tag = ARM64in_Test;
1348 i->ARM64in.Test.argL = argL;
1349 i->ARM64in.Test.argR = argR;
1350 return i;
1351 }
ARM64Instr_Shift(HReg dst,HReg argL,ARM64RI6 * argR,ARM64ShiftOp op)1352 ARM64Instr* ARM64Instr_Shift ( HReg dst,
1353 HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) {
1354 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1355 i->tag = ARM64in_Shift;
1356 i->ARM64in.Shift.dst = dst;
1357 i->ARM64in.Shift.argL = argL;
1358 i->ARM64in.Shift.argR = argR;
1359 i->ARM64in.Shift.op = op;
1360 return i;
1361 }
ARM64Instr_Unary(HReg dst,HReg src,ARM64UnaryOp op)1362 ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) {
1363 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1364 i->tag = ARM64in_Unary;
1365 i->ARM64in.Unary.dst = dst;
1366 i->ARM64in.Unary.src = src;
1367 i->ARM64in.Unary.op = op;
1368 return i;
1369 }
ARM64Instr_MovI(HReg dst,HReg src)1370 ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) {
1371 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1372 i->tag = ARM64in_MovI;
1373 i->ARM64in.MovI.dst = dst;
1374 i->ARM64in.MovI.src = src;
1375 vassert(hregClass(src) == HRcInt64);
1376 vassert(hregClass(dst) == HRcInt64);
1377 return i;
1378 }
ARM64Instr_Imm64(HReg dst,ULong imm64)1379 ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) {
1380 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1381 i->tag = ARM64in_Imm64;
1382 i->ARM64in.Imm64.dst = dst;
1383 i->ARM64in.Imm64.imm64 = imm64;
1384 return i;
1385 }
ARM64Instr_LdSt64(Bool isLoad,HReg rD,ARM64AMode * amode)1386 ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
1387 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1388 i->tag = ARM64in_LdSt64;
1389 i->ARM64in.LdSt64.isLoad = isLoad;
1390 i->ARM64in.LdSt64.rD = rD;
1391 i->ARM64in.LdSt64.amode = amode;
1392 return i;
1393 }
ARM64Instr_LdSt32(Bool isLoad,HReg rD,ARM64AMode * amode)1394 ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
1395 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1396 i->tag = ARM64in_LdSt32;
1397 i->ARM64in.LdSt32.isLoad = isLoad;
1398 i->ARM64in.LdSt32.rD = rD;
1399 i->ARM64in.LdSt32.amode = amode;
1400 return i;
1401 }
ARM64Instr_LdSt16(Bool isLoad,HReg rD,ARM64AMode * amode)1402 ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
1403 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1404 i->tag = ARM64in_LdSt16;
1405 i->ARM64in.LdSt16.isLoad = isLoad;
1406 i->ARM64in.LdSt16.rD = rD;
1407 i->ARM64in.LdSt16.amode = amode;
1408 return i;
1409 }
ARM64Instr_LdSt8(Bool isLoad,HReg rD,ARM64AMode * amode)1410 ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
1411 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1412 i->tag = ARM64in_LdSt8;
1413 i->ARM64in.LdSt8.isLoad = isLoad;
1414 i->ARM64in.LdSt8.rD = rD;
1415 i->ARM64in.LdSt8.amode = amode;
1416 return i;
1417 }
ARM64Instr_XDirect(Addr64 dstGA,ARM64AMode * amPC,ARM64CondCode cond,Bool toFastEP)1418 ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
1419 ARM64CondCode cond, Bool toFastEP ) {
1420 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1421 i->tag = ARM64in_XDirect;
1422 i->ARM64in.XDirect.dstGA = dstGA;
1423 i->ARM64in.XDirect.amPC = amPC;
1424 i->ARM64in.XDirect.cond = cond;
1425 i->ARM64in.XDirect.toFastEP = toFastEP;
1426 return i;
1427 }
ARM64Instr_XIndir(HReg dstGA,ARM64AMode * amPC,ARM64CondCode cond)1428 ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
1429 ARM64CondCode cond ) {
1430 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1431 i->tag = ARM64in_XIndir;
1432 i->ARM64in.XIndir.dstGA = dstGA;
1433 i->ARM64in.XIndir.amPC = amPC;
1434 i->ARM64in.XIndir.cond = cond;
1435 return i;
1436 }
ARM64Instr_XAssisted(HReg dstGA,ARM64AMode * amPC,ARM64CondCode cond,IRJumpKind jk)1437 ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
1438 ARM64CondCode cond, IRJumpKind jk ) {
1439 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1440 i->tag = ARM64in_XAssisted;
1441 i->ARM64in.XAssisted.dstGA = dstGA;
1442 i->ARM64in.XAssisted.amPC = amPC;
1443 i->ARM64in.XAssisted.cond = cond;
1444 i->ARM64in.XAssisted.jk = jk;
1445 return i;
1446 }
ARM64Instr_CSel(HReg dst,HReg argL,HReg argR,ARM64CondCode cond)1447 ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
1448 ARM64CondCode cond ) {
1449 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1450 i->tag = ARM64in_CSel;
1451 i->ARM64in.CSel.dst = dst;
1452 i->ARM64in.CSel.argL = argL;
1453 i->ARM64in.CSel.argR = argR;
1454 i->ARM64in.CSel.cond = cond;
1455 return i;
1456 }
ARM64Instr_Call(ARM64CondCode cond,HWord target,Int nArgRegs,RetLoc rloc)1457 ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, HWord target, Int nArgRegs,
1458 RetLoc rloc ) {
1459 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1460 i->tag = ARM64in_Call;
1461 i->ARM64in.Call.cond = cond;
1462 i->ARM64in.Call.target = target;
1463 i->ARM64in.Call.nArgRegs = nArgRegs;
1464 i->ARM64in.Call.rloc = rloc;
1465 vassert(is_sane_RetLoc(rloc));
1466 return i;
1467 }
ARM64Instr_AddToSP(Int simm)1468 extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) {
1469 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1470 i->tag = ARM64in_AddToSP;
1471 i->ARM64in.AddToSP.simm = simm;
1472 vassert(-4096 < simm && simm < 4096);
1473 vassert(0 == (simm & 0xF));
1474 return i;
1475 }
ARM64Instr_FromSP(HReg dst)1476 extern ARM64Instr* ARM64Instr_FromSP ( HReg dst ) {
1477 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1478 i->tag = ARM64in_FromSP;
1479 i->ARM64in.FromSP.dst = dst;
1480 return i;
1481 }
ARM64Instr_Mul(HReg dst,HReg argL,HReg argR,ARM64MulOp op)1482 ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
1483 ARM64MulOp op ) {
1484 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1485 i->tag = ARM64in_Mul;
1486 i->ARM64in.Mul.dst = dst;
1487 i->ARM64in.Mul.argL = argL;
1488 i->ARM64in.Mul.argR = argR;
1489 i->ARM64in.Mul.op = op;
1490 return i;
1491 }
ARM64Instr_LdrEX(Int szB)1492 ARM64Instr* ARM64Instr_LdrEX ( Int szB ) {
1493 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1494 i->tag = ARM64in_LdrEX;
1495 i->ARM64in.LdrEX.szB = szB;
1496 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1497 return i;
1498 }
ARM64Instr_StrEX(Int szB)1499 ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
1500 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1501 i->tag = ARM64in_StrEX;
1502 i->ARM64in.StrEX.szB = szB;
1503 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1504 return i;
1505 }
ARM64Instr_MFence(void)1506 ARM64Instr* ARM64Instr_MFence ( void ) {
1507 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1508 i->tag = ARM64in_MFence;
1509 return i;
1510 }
1511 //ZZ ARM64Instr* ARM64Instr_CLREX( void ) {
1512 //ZZ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1513 //ZZ i->tag = ARM64in_CLREX;
1514 //ZZ return i;
1515 //ZZ }
ARM64Instr_VLdStS(Bool isLoad,HReg sD,HReg rN,UInt uimm12)1516 ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1517 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1518 i->tag = ARM64in_VLdStS;
1519 i->ARM64in.VLdStS.isLoad = isLoad;
1520 i->ARM64in.VLdStS.sD = sD;
1521 i->ARM64in.VLdStS.rN = rN;
1522 i->ARM64in.VLdStS.uimm12 = uimm12;
1523 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
1524 return i;
1525 }
ARM64Instr_VLdStD(Bool isLoad,HReg dD,HReg rN,UInt uimm12)1526 ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
1527 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1528 i->tag = ARM64in_VLdStD;
1529 i->ARM64in.VLdStD.isLoad = isLoad;
1530 i->ARM64in.VLdStD.dD = dD;
1531 i->ARM64in.VLdStD.rN = rN;
1532 i->ARM64in.VLdStD.uimm12 = uimm12;
1533 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
1534 return i;
1535 }
ARM64Instr_VLdStQ(Bool isLoad,HReg rQ,HReg rN)1536 ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) {
1537 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1538 i->tag = ARM64in_VLdStQ;
1539 i->ARM64in.VLdStQ.isLoad = isLoad;
1540 i->ARM64in.VLdStQ.rQ = rQ;
1541 i->ARM64in.VLdStQ.rN = rN;
1542 return i;
1543 }
ARM64Instr_VCvtI2F(ARM64CvtOp how,HReg rD,HReg rS)1544 ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
1545 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1546 i->tag = ARM64in_VCvtI2F;
1547 i->ARM64in.VCvtI2F.how = how;
1548 i->ARM64in.VCvtI2F.rD = rD;
1549 i->ARM64in.VCvtI2F.rS = rS;
1550 return i;
1551 }
ARM64Instr_VCvtF2I(ARM64CvtOp how,HReg rD,HReg rS,UChar armRM)1552 ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
1553 UChar armRM ) {
1554 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1555 i->tag = ARM64in_VCvtF2I;
1556 i->ARM64in.VCvtF2I.how = how;
1557 i->ARM64in.VCvtF2I.rD = rD;
1558 i->ARM64in.VCvtF2I.rS = rS;
1559 i->ARM64in.VCvtF2I.armRM = armRM;
1560 vassert(armRM <= 3);
1561 return i;
1562 }
ARM64Instr_VCvtSD(Bool sToD,HReg dst,HReg src)1563 ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1564 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1565 i->tag = ARM64in_VCvtSD;
1566 i->ARM64in.VCvtSD.sToD = sToD;
1567 i->ARM64in.VCvtSD.dst = dst;
1568 i->ARM64in.VCvtSD.src = src;
1569 return i;
1570 }
ARM64Instr_VUnaryD(ARM64FpUnaryOp op,HReg dst,HReg src)1571 ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1572 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1573 i->tag = ARM64in_VUnaryD;
1574 i->ARM64in.VUnaryD.op = op;
1575 i->ARM64in.VUnaryD.dst = dst;
1576 i->ARM64in.VUnaryD.src = src;
1577 return i;
1578 }
ARM64Instr_VUnaryS(ARM64FpUnaryOp op,HReg dst,HReg src)1579 ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1580 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1581 i->tag = ARM64in_VUnaryS;
1582 i->ARM64in.VUnaryS.op = op;
1583 i->ARM64in.VUnaryS.dst = dst;
1584 i->ARM64in.VUnaryS.src = src;
1585 return i;
1586 }
ARM64Instr_VBinD(ARM64FpBinOp op,HReg dst,HReg argL,HReg argR)1587 ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op,
1588 HReg dst, HReg argL, HReg argR ) {
1589 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1590 i->tag = ARM64in_VBinD;
1591 i->ARM64in.VBinD.op = op;
1592 i->ARM64in.VBinD.dst = dst;
1593 i->ARM64in.VBinD.argL = argL;
1594 i->ARM64in.VBinD.argR = argR;
1595 return i;
1596 }
ARM64Instr_VBinS(ARM64FpBinOp op,HReg dst,HReg argL,HReg argR)1597 ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
1598 HReg dst, HReg argL, HReg argR ) {
1599 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1600 i->tag = ARM64in_VBinS;
1601 i->ARM64in.VBinS.op = op;
1602 i->ARM64in.VBinS.dst = dst;
1603 i->ARM64in.VBinS.argL = argL;
1604 i->ARM64in.VBinS.argR = argR;
1605 return i;
1606 }
ARM64Instr_VCmpD(HReg argL,HReg argR)1607 ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
1608 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1609 i->tag = ARM64in_VCmpD;
1610 i->ARM64in.VCmpD.argL = argL;
1611 i->ARM64in.VCmpD.argR = argR;
1612 return i;
1613 }
ARM64Instr_VCmpS(HReg argL,HReg argR)1614 ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) {
1615 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1616 i->tag = ARM64in_VCmpS;
1617 i->ARM64in.VCmpS.argL = argL;
1618 i->ARM64in.VCmpS.argR = argR;
1619 return i;
1620 }
ARM64Instr_FPCR(Bool toFPCR,HReg iReg)1621 ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) {
1622 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1623 i->tag = ARM64in_FPCR;
1624 i->ARM64in.FPCR.toFPCR = toFPCR;
1625 i->ARM64in.FPCR.iReg = iReg;
1626 return i;
1627 }
ARM64Instr_VBinV(ARM64VecBinOp op,HReg dst,HReg argL,HReg argR)1628 ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op,
1629 HReg dst, HReg argL, HReg argR ) {
1630 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1631 i->tag = ARM64in_VBinV;
1632 i->ARM64in.VBinV.op = op;
1633 i->ARM64in.VBinV.dst = dst;
1634 i->ARM64in.VBinV.argL = argL;
1635 i->ARM64in.VBinV.argR = argR;
1636 return i;
1637 }
ARM64Instr_VUnaryV(ARM64VecUnaryOp op,HReg dst,HReg arg)1638 ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) {
1639 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1640 i->tag = ARM64in_VUnaryV;
1641 i->ARM64in.VUnaryV.op = op;
1642 i->ARM64in.VUnaryV.dst = dst;
1643 i->ARM64in.VUnaryV.arg = arg;
1644 return i;
1645 }
ARM64Instr_VNarrowV(UInt dszBlg2,HReg dst,HReg src)1646 ARM64Instr* ARM64Instr_VNarrowV ( UInt dszBlg2, HReg dst, HReg src ) {
1647 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1648 i->tag = ARM64in_VNarrowV;
1649 i->ARM64in.VNarrowV.dszBlg2 = dszBlg2;
1650 i->ARM64in.VNarrowV.dst = dst;
1651 i->ARM64in.VNarrowV.src = src;
1652 vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2);
1653 return i;
1654 }
ARM64Instr_VShiftImmV(ARM64VecShiftOp op,HReg dst,HReg src,UInt amt)1655 ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op,
1656 HReg dst, HReg src, UInt amt ) {
1657 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1658 i->tag = ARM64in_VShiftImmV;
1659 i->ARM64in.VShiftImmV.op = op;
1660 i->ARM64in.VShiftImmV.dst = dst;
1661 i->ARM64in.VShiftImmV.src = src;
1662 i->ARM64in.VShiftImmV.amt = amt;
1663 UInt maxSh = 0;
1664 switch (op) {
1665 case ARM64vecsh_USHR64x2: case ARM64vecsh_SSHR64x2:
1666 case ARM64vecsh_SHL64x2:
1667 maxSh = 63; break;
1668 case ARM64vecsh_USHR32x4: case ARM64vecsh_SSHR32x4:
1669 case ARM64vecsh_SHL32x4:
1670 maxSh = 31; break;
1671 case ARM64vecsh_USHR16x8: case ARM64vecsh_SSHR16x8:
1672 case ARM64vecsh_SHL16x8:
1673 maxSh = 15; break;
1674 case ARM64vecsh_USHR8x16: case ARM64vecsh_SSHR8x16:
1675 case ARM64vecsh_SHL8x16:
1676 maxSh = 7; break;
1677 default:
1678 vassert(0);
1679 }
1680 vassert(maxSh > 0);
1681 vassert(amt > 0 && amt <= maxSh);
1682 return i;
1683 }
1684 //ZZ ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1685 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1686 //ZZ i->tag = ARMin_VAluS;
1687 //ZZ i->ARMin.VAluS.op = op;
1688 //ZZ i->ARMin.VAluS.dst = dst;
1689 //ZZ i->ARMin.VAluS.argL = argL;
1690 //ZZ i->ARMin.VAluS.argR = argR;
1691 //ZZ return i;
1692 //ZZ }
1693 //ZZ ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1694 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1695 //ZZ i->tag = ARMin_VCMovD;
1696 //ZZ i->ARMin.VCMovD.cond = cond;
1697 //ZZ i->ARMin.VCMovD.dst = dst;
1698 //ZZ i->ARMin.VCMovD.src = src;
1699 //ZZ vassert(cond != ARMcc_AL);
1700 //ZZ return i;
1701 //ZZ }
1702 //ZZ ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1703 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1704 //ZZ i->tag = ARMin_VCMovS;
1705 //ZZ i->ARMin.VCMovS.cond = cond;
1706 //ZZ i->ARMin.VCMovS.dst = dst;
1707 //ZZ i->ARMin.VCMovS.src = src;
1708 //ZZ vassert(cond != ARMcc_AL);
1709 //ZZ return i;
1710 //ZZ }
1711 //ZZ ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1712 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1713 //ZZ i->tag = ARMin_VXferD;
1714 //ZZ i->ARMin.VXferD.toD = toD;
1715 //ZZ i->ARMin.VXferD.dD = dD;
1716 //ZZ i->ARMin.VXferD.rHi = rHi;
1717 //ZZ i->ARMin.VXferD.rLo = rLo;
1718 //ZZ return i;
1719 //ZZ }
1720 //ZZ ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1721 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1722 //ZZ i->tag = ARMin_VXferS;
1723 //ZZ i->ARMin.VXferS.toS = toS;
1724 //ZZ i->ARMin.VXferS.fD = fD;
1725 //ZZ i->ARMin.VXferS.rLo = rLo;
1726 //ZZ return i;
1727 //ZZ }
1728 //ZZ ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1729 //ZZ HReg dst, HReg src ) {
1730 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1731 //ZZ i->tag = ARMin_VCvtID;
1732 //ZZ i->ARMin.VCvtID.iToD = iToD;
1733 //ZZ i->ARMin.VCvtID.syned = syned;
1734 //ZZ i->ARMin.VCvtID.dst = dst;
1735 //ZZ i->ARMin.VCvtID.src = src;
1736 //ZZ return i;
1737 //ZZ }
1738 //ZZ ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1739 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1740 //ZZ i->tag = ARMin_NLdStD;
1741 //ZZ i->ARMin.NLdStD.isLoad = isLoad;
1742 //ZZ i->ARMin.NLdStD.dD = dD;
1743 //ZZ i->ARMin.NLdStD.amode = amode;
1744 //ZZ return i;
1745 //ZZ }
1746 //ZZ
1747 //ZZ ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1748 //ZZ UInt size, Bool Q ) {
1749 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1750 //ZZ i->tag = ARMin_NUnary;
1751 //ZZ i->ARMin.NUnary.op = op;
1752 //ZZ i->ARMin.NUnary.src = nQ;
1753 //ZZ i->ARMin.NUnary.dst = dQ;
1754 //ZZ i->ARMin.NUnary.size = size;
1755 //ZZ i->ARMin.NUnary.Q = Q;
1756 //ZZ return i;
1757 //ZZ }
1758 //ZZ
1759 //ZZ ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
1760 //ZZ UInt size, Bool Q ) {
1761 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1762 //ZZ i->tag = ARMin_NUnaryS;
1763 //ZZ i->ARMin.NUnaryS.op = op;
1764 //ZZ i->ARMin.NUnaryS.src = src;
1765 //ZZ i->ARMin.NUnaryS.dst = dst;
1766 //ZZ i->ARMin.NUnaryS.size = size;
1767 //ZZ i->ARMin.NUnaryS.Q = Q;
1768 //ZZ return i;
1769 //ZZ }
1770 //ZZ
1771 //ZZ ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1772 //ZZ UInt size, Bool Q ) {
1773 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1774 //ZZ i->tag = ARMin_NDual;
1775 //ZZ i->ARMin.NDual.op = op;
1776 //ZZ i->ARMin.NDual.arg1 = nQ;
1777 //ZZ i->ARMin.NDual.arg2 = mQ;
1778 //ZZ i->ARMin.NDual.size = size;
1779 //ZZ i->ARMin.NDual.Q = Q;
1780 //ZZ return i;
1781 //ZZ }
1782 //ZZ
1783 //ZZ ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1784 //ZZ HReg dst, HReg argL, HReg argR,
1785 //ZZ UInt size, Bool Q ) {
1786 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1787 //ZZ i->tag = ARMin_NBinary;
1788 //ZZ i->ARMin.NBinary.op = op;
1789 //ZZ i->ARMin.NBinary.argL = argL;
1790 //ZZ i->ARMin.NBinary.argR = argR;
1791 //ZZ i->ARMin.NBinary.dst = dst;
1792 //ZZ i->ARMin.NBinary.size = size;
1793 //ZZ i->ARMin.NBinary.Q = Q;
1794 //ZZ return i;
1795 //ZZ }
1796
ARM64Instr_VImmQ(HReg rQ,UShort imm)1797 ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) {
1798 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1799 i->tag = ARM64in_VImmQ;
1800 i->ARM64in.VImmQ.rQ = rQ;
1801 i->ARM64in.VImmQ.imm = imm;
1802 return i;
1803 }
ARM64Instr_VDfromX(HReg rD,HReg rX)1804 ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) {
1805 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1806 i->tag = ARM64in_VDfromX;
1807 i->ARM64in.VDfromX.rD = rD;
1808 i->ARM64in.VDfromX.rX = rX;
1809 return i;
1810 }
ARM64Instr_VQfromXX(HReg rQ,HReg rXhi,HReg rXlo)1811 ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) {
1812 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1813 i->tag = ARM64in_VQfromXX;
1814 i->ARM64in.VQfromXX.rQ = rQ;
1815 i->ARM64in.VQfromXX.rXhi = rXhi;
1816 i->ARM64in.VQfromXX.rXlo = rXlo;
1817 return i;
1818 }
ARM64Instr_VXfromQ(HReg rX,HReg rQ,UInt laneNo)1819 ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
1820 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1821 i->tag = ARM64in_VXfromQ;
1822 i->ARM64in.VXfromQ.rX = rX;
1823 i->ARM64in.VXfromQ.rQ = rQ;
1824 i->ARM64in.VXfromQ.laneNo = laneNo;
1825 vassert(laneNo <= 1);
1826 return i;
1827 }
ARM64Instr_VMov(UInt szB,HReg dst,HReg src)1828 ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
1829 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1830 i->tag = ARM64in_VMov;
1831 i->ARM64in.VMov.szB = szB;
1832 i->ARM64in.VMov.dst = dst;
1833 i->ARM64in.VMov.src = src;
1834 switch (szB) {
1835 case 16:
1836 vassert(hregClass(src) == HRcVec128);
1837 vassert(hregClass(dst) == HRcVec128);
1838 break;
1839 case 8:
1840 vassert(hregClass(src) == HRcFlt64);
1841 vassert(hregClass(dst) == HRcFlt64);
1842 break;
1843 default:
1844 vpanic("ARM64Instr_VMov");
1845 }
1846 return i;
1847 }
1848
1849 //ZZ ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1850 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1851 //ZZ i->tag = ARMin_NCMovQ;
1852 //ZZ i->ARMin.NCMovQ.cond = cond;
1853 //ZZ i->ARMin.NCMovQ.dst = dst;
1854 //ZZ i->ARMin.NCMovQ.src = src;
1855 //ZZ vassert(cond != ARMcc_AL);
1856 //ZZ return i;
1857 //ZZ }
1858 //ZZ
1859 //ZZ ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1860 //ZZ HReg dst, HReg argL, HReg argR,
1861 //ZZ UInt size, Bool Q ) {
1862 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1863 //ZZ i->tag = ARMin_NShift;
1864 //ZZ i->ARMin.NShift.op = op;
1865 //ZZ i->ARMin.NShift.argL = argL;
1866 //ZZ i->ARMin.NShift.argR = argR;
1867 //ZZ i->ARMin.NShift.dst = dst;
1868 //ZZ i->ARMin.NShift.size = size;
1869 //ZZ i->ARMin.NShift.Q = Q;
1870 //ZZ return i;
1871 //ZZ }
1872 //ZZ
1873 //ZZ ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
1874 //ZZ {
1875 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1876 //ZZ i->tag = ARMin_NShl64;
1877 //ZZ i->ARMin.NShl64.dst = dst;
1878 //ZZ i->ARMin.NShl64.src = src;
1879 //ZZ i->ARMin.NShl64.amt = amt;
1880 //ZZ vassert(amt >= 1 && amt <= 63);
1881 //ZZ return i;
1882 //ZZ }
1883 //ZZ
1884 //ZZ /* Helper copy-pasted from isel.c */
1885 //ZZ static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1886 //ZZ {
1887 //ZZ UInt i;
1888 //ZZ for (i = 0; i < 16; i++) {
1889 //ZZ if (0 == (u & 0xFFFFFF00)) {
1890 //ZZ *u8 = u;
1891 //ZZ *u4 = i;
1892 //ZZ return True;
1893 //ZZ }
1894 //ZZ u = ROR32(u, 30);
1895 //ZZ }
1896 //ZZ vassert(i == 16);
1897 //ZZ return False;
1898 //ZZ }
1899 //ZZ
1900 //ZZ ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1901 //ZZ UInt u8, u4;
1902 //ZZ ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
1903 //ZZ /* Try to generate single ADD if possible */
1904 //ZZ if (fitsIn8x4(&u8, &u4, imm32)) {
1905 //ZZ i->tag = ARMin_Alu;
1906 //ZZ i->ARMin.Alu.op = ARMalu_ADD;
1907 //ZZ i->ARMin.Alu.dst = rD;
1908 //ZZ i->ARMin.Alu.argL = rN;
1909 //ZZ i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1910 //ZZ } else {
1911 //ZZ i->tag = ARMin_Add32;
1912 //ZZ i->ARMin.Add32.rD = rD;
1913 //ZZ i->ARMin.Add32.rN = rN;
1914 //ZZ i->ARMin.Add32.imm32 = imm32;
1915 //ZZ }
1916 //ZZ return i;
1917 //ZZ }
1918
ARM64Instr_EvCheck(ARM64AMode * amCounter,ARM64AMode * amFailAddr)1919 ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
1920 ARM64AMode* amFailAddr ) {
1921 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1922 i->tag = ARM64in_EvCheck;
1923 i->ARM64in.EvCheck.amCounter = amCounter;
1924 i->ARM64in.EvCheck.amFailAddr = amFailAddr;
1925 return i;
1926 }
1927
1928 //ZZ ARMInstr* ARMInstr_ProfInc ( void ) {
1929 //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1930 //ZZ i->tag = ARMin_ProfInc;
1931 //ZZ return i;
1932 //ZZ }
1933
1934 /* ... */
1935
ppARM64Instr(ARM64Instr * i)1936 void ppARM64Instr ( ARM64Instr* i ) {
1937 switch (i->tag) {
1938 case ARM64in_Arith:
1939 vex_printf("%s ", i->ARM64in.Arith.isAdd ? "add" : "sub");
1940 ppHRegARM64(i->ARM64in.Arith.dst);
1941 vex_printf(", ");
1942 ppHRegARM64(i->ARM64in.Arith.argL);
1943 vex_printf(", ");
1944 ppARM64RIA(i->ARM64in.Arith.argR);
1945 return;
1946 case ARM64in_Cmp:
1947 vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? " " : "(w)" );
1948 ppHRegARM64(i->ARM64in.Cmp.argL);
1949 vex_printf(", ");
1950 ppARM64RIA(i->ARM64in.Cmp.argR);
1951 return;
1952 case ARM64in_Logic:
1953 vex_printf("%s ", showARM64LogicOp(i->ARM64in.Logic.op));
1954 ppHRegARM64(i->ARM64in.Logic.dst);
1955 vex_printf(", ");
1956 ppHRegARM64(i->ARM64in.Logic.argL);
1957 vex_printf(", ");
1958 ppARM64RIL(i->ARM64in.Logic.argR);
1959 return;
1960 case ARM64in_Test:
1961 vex_printf("tst ");
1962 ppHRegARM64(i->ARM64in.Test.argL);
1963 vex_printf(", ");
1964 ppARM64RIL(i->ARM64in.Test.argR);
1965 return;
1966 case ARM64in_Shift:
1967 vex_printf("%s ", showARM64ShiftOp(i->ARM64in.Shift.op));
1968 ppHRegARM64(i->ARM64in.Shift.dst);
1969 vex_printf(", ");
1970 ppHRegARM64(i->ARM64in.Shift.argL);
1971 vex_printf(", ");
1972 ppARM64RI6(i->ARM64in.Shift.argR);
1973 return;
1974 case ARM64in_Unary:
1975 vex_printf("%s ", showARM64UnaryOp(i->ARM64in.Unary.op));
1976 ppHRegARM64(i->ARM64in.Unary.dst);
1977 vex_printf(", ");
1978 ppHRegARM64(i->ARM64in.Unary.src);
1979 return;
1980 case ARM64in_MovI:
1981 vex_printf("mov ");
1982 ppHRegARM64(i->ARM64in.MovI.dst);
1983 vex_printf(", ");
1984 ppHRegARM64(i->ARM64in.MovI.src);
1985 return;
1986 case ARM64in_Imm64:
1987 vex_printf("imm64 ");
1988 ppHRegARM64(i->ARM64in.Imm64.dst);
1989 vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64);
1990 return;
1991 case ARM64in_LdSt64:
1992 if (i->ARM64in.LdSt64.isLoad) {
1993 vex_printf("ldr ");
1994 ppHRegARM64(i->ARM64in.LdSt64.rD);
1995 vex_printf(", ");
1996 ppARM64AMode(i->ARM64in.LdSt64.amode);
1997 } else {
1998 vex_printf("str ");
1999 ppARM64AMode(i->ARM64in.LdSt64.amode);
2000 vex_printf(", ");
2001 ppHRegARM64(i->ARM64in.LdSt64.rD);
2002 }
2003 return;
2004 case ARM64in_LdSt32:
2005 if (i->ARM64in.LdSt32.isLoad) {
2006 vex_printf("ldruw ");
2007 ppHRegARM64(i->ARM64in.LdSt32.rD);
2008 vex_printf(", ");
2009 ppARM64AMode(i->ARM64in.LdSt32.amode);
2010 } else {
2011 vex_printf("strw ");
2012 ppARM64AMode(i->ARM64in.LdSt32.amode);
2013 vex_printf(", ");
2014 ppHRegARM64(i->ARM64in.LdSt32.rD);
2015 }
2016 return;
2017 case ARM64in_LdSt16:
2018 if (i->ARM64in.LdSt16.isLoad) {
2019 vex_printf("ldruh ");
2020 ppHRegARM64(i->ARM64in.LdSt16.rD);
2021 vex_printf(", ");
2022 ppARM64AMode(i->ARM64in.LdSt16.amode);
2023 } else {
2024 vex_printf("strh ");
2025 ppARM64AMode(i->ARM64in.LdSt16.amode);
2026 vex_printf(", ");
2027 ppHRegARM64(i->ARM64in.LdSt16.rD);
2028 }
2029 return;
2030 case ARM64in_LdSt8:
2031 if (i->ARM64in.LdSt8.isLoad) {
2032 vex_printf("ldrub ");
2033 ppHRegARM64(i->ARM64in.LdSt8.rD);
2034 vex_printf(", ");
2035 ppARM64AMode(i->ARM64in.LdSt8.amode);
2036 } else {
2037 vex_printf("strb ");
2038 ppARM64AMode(i->ARM64in.LdSt8.amode);
2039 vex_printf(", ");
2040 ppHRegARM64(i->ARM64in.LdSt8.rD);
2041 }
2042 return;
2043 case ARM64in_XDirect:
2044 vex_printf("(xDirect) ");
2045 vex_printf("if (%%pstate.%s) { ",
2046 showARM64CondCode(i->ARM64in.XDirect.cond));
2047 vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA);
2048 vex_printf("str x9,");
2049 ppARM64AMode(i->ARM64in.XDirect.amPC);
2050 vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
2051 i->ARM64in.XDirect.toFastEP ? "fast" : "slow");
2052 vex_printf("blr x9 }");
2053 return;
2054 case ARM64in_XIndir:
2055 vex_printf("(xIndir) ");
2056 vex_printf("if (%%pstate.%s) { ",
2057 showARM64CondCode(i->ARM64in.XIndir.cond));
2058 vex_printf("str ");
2059 ppHRegARM64(i->ARM64in.XIndir.dstGA);
2060 vex_printf(",");
2061 ppARM64AMode(i->ARM64in.XIndir.amPC);
2062 vex_printf("; imm64 x9,$disp_cp_xindir; ");
2063 vex_printf("br x9 }");
2064 return;
2065 case ARM64in_XAssisted:
2066 vex_printf("(xAssisted) ");
2067 vex_printf("if (%%pstate.%s) { ",
2068 showARM64CondCode(i->ARM64in.XAssisted.cond));
2069 vex_printf("str ");
2070 ppHRegARM64(i->ARM64in.XAssisted.dstGA);
2071 vex_printf(",");
2072 ppARM64AMode(i->ARM64in.XAssisted.amPC);
2073 vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
2074 (Int)i->ARM64in.XAssisted.jk);
2075 vex_printf("imm64 x9,$disp_cp_xassisted; ");
2076 vex_printf("br x9 }");
2077 return;
2078 case ARM64in_CSel:
2079 vex_printf("csel ");
2080 ppHRegARM64(i->ARM64in.CSel.dst);
2081 vex_printf(", ");
2082 ppHRegARM64(i->ARM64in.CSel.argL);
2083 vex_printf(", ");
2084 ppHRegARM64(i->ARM64in.CSel.argR);
2085 vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond));
2086 return;
2087 case ARM64in_Call:
2088 vex_printf("call%s ",
2089 i->ARM64in.Call.cond==ARM64cc_AL
2090 ? " " : showARM64CondCode(i->ARM64in.Call.cond));
2091 vex_printf("0x%lx [nArgRegs=%d, ",
2092 i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs);
2093 ppRetLoc(i->ARM64in.Call.rloc);
2094 vex_printf("]");
2095 return;
2096 case ARM64in_AddToSP: {
2097 Int simm = i->ARM64in.AddToSP.simm;
2098 vex_printf("%s xsp, xsp, #%d", simm < 0 ? "sub" : "add",
2099 simm < 0 ? -simm : simm);
2100 return;
2101 }
2102 case ARM64in_FromSP:
2103 vex_printf("mov ");
2104 ppHRegARM64(i->ARM64in.FromSP.dst);
2105 vex_printf(", xsp");
2106 return;
2107 case ARM64in_Mul:
2108 vex_printf("%s ", showARM64MulOp(i->ARM64in.Mul.op));
2109 ppHRegARM64(i->ARM64in.Mul.dst);
2110 vex_printf(", ");
2111 ppHRegARM64(i->ARM64in.Mul.argL);
2112 vex_printf(", ");
2113 ppHRegARM64(i->ARM64in.Mul.argR);
2114 return;
2115
2116 case ARM64in_LdrEX: {
2117 const HChar* sz = " ";
2118 switch (i->ARM64in.LdrEX.szB) {
2119 case 1: sz = "b"; break;
2120 case 2: sz = "h"; break;
2121 case 4: case 8: break;
2122 default: vassert(0);
2123 }
2124 vex_printf("ldxr%s %c2, [x4]",
2125 sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w');
2126 return;
2127 }
2128 case ARM64in_StrEX: {
2129 const HChar* sz = " ";
2130 switch (i->ARM64in.StrEX.szB) {
2131 case 1: sz = "b"; break;
2132 case 2: sz = "h"; break;
2133 case 4: case 8: break;
2134 default: vassert(0);
2135 }
2136 vex_printf("stxr%s w0, %c2, [x4]",
2137 sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
2138 return;
2139 }
2140 case ARM64in_MFence:
2141 vex_printf("(mfence) dsb sy; dmb sy; isb");
2142 return;
2143 //ZZ case ARM64in_CLREX:
2144 //ZZ vex_printf("clrex");
2145 //ZZ return;
2146 case ARM64in_VLdStS:
2147 if (i->ARM64in.VLdStS.isLoad) {
2148 vex_printf("ldr ");
2149 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
2150 vex_printf(", %u(", i->ARM64in.VLdStS.uimm12);
2151 ppHRegARM64(i->ARM64in.VLdStS.rN);
2152 vex_printf(")");
2153 } else {
2154 vex_printf("str ");
2155 vex_printf("%u(", i->ARM64in.VLdStS.uimm12);
2156 ppHRegARM64(i->ARM64in.VLdStS.rN);
2157 vex_printf("), ");
2158 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
2159 }
2160 return;
2161 case ARM64in_VLdStD:
2162 if (i->ARM64in.VLdStD.isLoad) {
2163 vex_printf("ldr ");
2164 ppHRegARM64(i->ARM64in.VLdStD.dD);
2165 vex_printf(", %u(", i->ARM64in.VLdStD.uimm12);
2166 ppHRegARM64(i->ARM64in.VLdStD.rN);
2167 vex_printf(")");
2168 } else {
2169 vex_printf("str ");
2170 vex_printf("%u(", i->ARM64in.VLdStD.uimm12);
2171 ppHRegARM64(i->ARM64in.VLdStD.rN);
2172 vex_printf("), ");
2173 ppHRegARM64(i->ARM64in.VLdStD.dD);
2174 }
2175 return;
2176 case ARM64in_VLdStQ:
2177 if (i->ARM64in.VLdStQ.isLoad)
2178 vex_printf("ld1.2d {");
2179 else
2180 vex_printf("st1.2d {");
2181 ppHRegARM64(i->ARM64in.VLdStQ.rQ);
2182 vex_printf("}, [");
2183 ppHRegARM64(i->ARM64in.VLdStQ.rN);
2184 vex_printf("]");
2185 return;
2186 case ARM64in_VCvtI2F: {
2187 HChar syn = '?';
2188 UInt fszB = 0;
2189 UInt iszB = 0;
2190 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how);
2191 vex_printf("%ccvtf ", syn);
2192 ppHRegARM64(i->ARM64in.VCvtI2F.rD);
2193 vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D');
2194 ppHRegARM64(i->ARM64in.VCvtI2F.rS);
2195 vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X');
2196 return;
2197 }
2198 case ARM64in_VCvtF2I: {
2199 HChar syn = '?';
2200 UInt fszB = 0;
2201 UInt iszB = 0;
2202 HChar rmo = '?';
2203 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how);
2204 UChar armRM = i->ARM64in.VCvtF2I.armRM;
2205 if (armRM < 4) rmo = "npmz"[armRM];
2206 vex_printf("fcvt%c%c ", rmo, syn);
2207 ppHRegARM64(i->ARM64in.VCvtF2I.rD);
2208 vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X');
2209 ppHRegARM64(i->ARM64in.VCvtF2I.rS);
2210 vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D');
2211 return;
2212 }
2213 case ARM64in_VCvtSD:
2214 vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s");
2215 if (i->ARM64in.VCvtSD.sToD) {
2216 ppHRegARM64(i->ARM64in.VCvtSD.dst);
2217 vex_printf(", ");
2218 ppHRegARM64asSreg(i->ARM64in.VCvtSD.src);
2219 } else {
2220 ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst);
2221 vex_printf(", ");
2222 ppHRegARM64(i->ARM64in.VCvtSD.src);
2223 }
2224 return;
2225 case ARM64in_VUnaryD:
2226 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
2227 ppHRegARM64(i->ARM64in.VUnaryD.dst);
2228 vex_printf(", ");
2229 ppHRegARM64(i->ARM64in.VUnaryD.src);
2230 return;
2231 case ARM64in_VUnaryS:
2232 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op));
2233 ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst);
2234 vex_printf(", ");
2235 ppHRegARM64asSreg(i->ARM64in.VUnaryS.src);
2236 return;
2237 case ARM64in_VBinD:
2238 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinD.op));
2239 ppHRegARM64(i->ARM64in.VBinD.dst);
2240 vex_printf(", ");
2241 ppHRegARM64(i->ARM64in.VBinD.argL);
2242 vex_printf(", ");
2243 ppHRegARM64(i->ARM64in.VBinD.argR);
2244 return;
2245 case ARM64in_VBinS:
2246 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinS.op));
2247 ppHRegARM64asSreg(i->ARM64in.VBinS.dst);
2248 vex_printf(", ");
2249 ppHRegARM64asSreg(i->ARM64in.VBinS.argL);
2250 vex_printf(", ");
2251 ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
2252 return;
2253 case ARM64in_VCmpD:
2254 vex_printf("fcmp ");
2255 ppHRegARM64(i->ARM64in.VCmpD.argL);
2256 vex_printf(", ");
2257 ppHRegARM64(i->ARM64in.VCmpD.argR);
2258 return;
2259 case ARM64in_VCmpS:
2260 vex_printf("fcmp ");
2261 ppHRegARM64asSreg(i->ARM64in.VCmpS.argL);
2262 vex_printf(", ");
2263 ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
2264 return;
2265 case ARM64in_FPCR:
2266 if (i->ARM64in.FPCR.toFPCR) {
2267 vex_printf("msr fpcr, ");
2268 ppHRegARM64(i->ARM64in.FPCR.iReg);
2269 } else {
2270 vex_printf("mrs ");
2271 ppHRegARM64(i->ARM64in.FPCR.iReg);
2272 vex_printf(", fpcr");
2273 }
2274 return;
2275 case ARM64in_VBinV: {
2276 const HChar* nm = "??";
2277 const HChar* ar = "??";
2278 showARM64VecBinOp(&nm, &ar, i->ARM64in.VBinV.op);
2279 vex_printf("%s ", nm);
2280 ppHRegARM64(i->ARM64in.VBinV.dst);
2281 vex_printf(".%s, ", ar);
2282 ppHRegARM64(i->ARM64in.VBinV.argL);
2283 vex_printf(".%s, ", ar);
2284 ppHRegARM64(i->ARM64in.VBinV.argR);
2285 vex_printf(".%s", ar);
2286 return;
2287 }
2288 case ARM64in_VUnaryV: {
2289 const HChar* nm = "??";
2290 const HChar* ar = "??";
2291 showARM64VecUnaryOp(&nm, &ar, i->ARM64in.VUnaryV.op);
2292 vex_printf("%s ", nm);
2293 ppHRegARM64(i->ARM64in.VUnaryV.dst);
2294 vex_printf(".%s, ", ar);
2295 ppHRegARM64(i->ARM64in.VUnaryV.arg);
2296 vex_printf(".%s", ar);
2297 return;
2298 }
2299 case ARM64in_VNarrowV: {
2300 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
2301 const HChar* darr[3] = { "8b", "4h", "2s" };
2302 const HChar* sarr[3] = { "8h", "4s", "2d" };
2303 vex_printf("xtn ");
2304 ppHRegARM64(i->ARM64in.VNarrowV.dst);
2305 vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??");
2306 ppHRegARM64(i->ARM64in.VNarrowV.src);
2307 vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??");
2308 return;
2309 }
2310 case ARM64in_VShiftImmV: {
2311 const HChar* nm = "??";
2312 const HChar* ar = "??";
2313 showARM64VecShiftOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
2314 vex_printf("%s ", nm);
2315 ppHRegARM64(i->ARM64in.VShiftImmV.dst);
2316 vex_printf(".%s, ", ar);
2317 ppHRegARM64(i->ARM64in.VShiftImmV.src);
2318 vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
2319 return;
2320 }
2321 //ZZ case ARMin_VAluS:
2322 //ZZ vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
2323 //ZZ ppHRegARM(i->ARMin.VAluS.dst);
2324 //ZZ vex_printf(", ");
2325 //ZZ ppHRegARM(i->ARMin.VAluS.argL);
2326 //ZZ vex_printf(", ");
2327 //ZZ ppHRegARM(i->ARMin.VAluS.argR);
2328 //ZZ return;
2329 //ZZ case ARMin_VCMovD:
2330 //ZZ vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
2331 //ZZ ppHRegARM(i->ARMin.VCMovD.dst);
2332 //ZZ vex_printf(", ");
2333 //ZZ ppHRegARM(i->ARMin.VCMovD.src);
2334 //ZZ return;
2335 //ZZ case ARMin_VCMovS:
2336 //ZZ vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
2337 //ZZ ppHRegARM(i->ARMin.VCMovS.dst);
2338 //ZZ vex_printf(", ");
2339 //ZZ ppHRegARM(i->ARMin.VCMovS.src);
2340 //ZZ return;
2341 //ZZ case ARMin_VXferD:
2342 //ZZ vex_printf("vmov ");
2343 //ZZ if (i->ARMin.VXferD.toD) {
2344 //ZZ ppHRegARM(i->ARMin.VXferD.dD);
2345 //ZZ vex_printf(", ");
2346 //ZZ ppHRegARM(i->ARMin.VXferD.rLo);
2347 //ZZ vex_printf(", ");
2348 //ZZ ppHRegARM(i->ARMin.VXferD.rHi);
2349 //ZZ } else {
2350 //ZZ ppHRegARM(i->ARMin.VXferD.rLo);
2351 //ZZ vex_printf(", ");
2352 //ZZ ppHRegARM(i->ARMin.VXferD.rHi);
2353 //ZZ vex_printf(", ");
2354 //ZZ ppHRegARM(i->ARMin.VXferD.dD);
2355 //ZZ }
2356 //ZZ return;
2357 //ZZ case ARMin_VXferS:
2358 //ZZ vex_printf("vmov ");
2359 //ZZ if (i->ARMin.VXferS.toS) {
2360 //ZZ ppHRegARM(i->ARMin.VXferS.fD);
2361 //ZZ vex_printf(", ");
2362 //ZZ ppHRegARM(i->ARMin.VXferS.rLo);
2363 //ZZ } else {
2364 //ZZ ppHRegARM(i->ARMin.VXferS.rLo);
2365 //ZZ vex_printf(", ");
2366 //ZZ ppHRegARM(i->ARMin.VXferS.fD);
2367 //ZZ }
2368 //ZZ return;
2369 //ZZ case ARMin_VCvtID: {
2370 //ZZ const HChar* nm = "?";
2371 //ZZ if (i->ARMin.VCvtID.iToD) {
2372 //ZZ nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
2373 //ZZ } else {
2374 //ZZ nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
2375 //ZZ }
2376 //ZZ vex_printf("%s ", nm);
2377 //ZZ ppHRegARM(i->ARMin.VCvtID.dst);
2378 //ZZ vex_printf(", ");
2379 //ZZ ppHRegARM(i->ARMin.VCvtID.src);
2380 //ZZ return;
2381 //ZZ }
2382 //ZZ case ARMin_NLdStD:
2383 //ZZ if (i->ARMin.NLdStD.isLoad)
2384 //ZZ vex_printf("vld1.32 {");
2385 //ZZ else
2386 //ZZ vex_printf("vst1.32 {");
2387 //ZZ ppHRegARM(i->ARMin.NLdStD.dD);
2388 //ZZ vex_printf("} ");
2389 //ZZ ppARMAModeN(i->ARMin.NLdStD.amode);
2390 //ZZ return;
2391 //ZZ case ARMin_NUnary:
2392 //ZZ vex_printf("%s%s%s ",
2393 //ZZ showARMNeonUnOp(i->ARMin.NUnary.op),
2394 //ZZ showARMNeonUnOpDataType(i->ARMin.NUnary.op),
2395 //ZZ showARMNeonDataSize(i));
2396 //ZZ ppHRegARM(i->ARMin.NUnary.dst);
2397 //ZZ vex_printf(", ");
2398 //ZZ ppHRegARM(i->ARMin.NUnary.src);
2399 //ZZ if (i->ARMin.NUnary.op == ARMneon_EQZ)
2400 //ZZ vex_printf(", #0");
2401 //ZZ if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
2402 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
2403 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
2404 //ZZ i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
2405 //ZZ vex_printf(", #%d", i->ARMin.NUnary.size);
2406 //ZZ }
2407 //ZZ if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
2408 //ZZ i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
2409 //ZZ i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
2410 //ZZ UInt size;
2411 //ZZ size = i->ARMin.NUnary.size;
2412 //ZZ if (size & 0x40) {
2413 //ZZ vex_printf(", #%d", size - 64);
2414 //ZZ } else if (size & 0x20) {
2415 //ZZ vex_printf(", #%d", size - 32);
2416 //ZZ } else if (size & 0x10) {
2417 //ZZ vex_printf(", #%d", size - 16);
2418 //ZZ } else if (size & 0x08) {
2419 //ZZ vex_printf(", #%d", size - 8);
2420 //ZZ }
2421 //ZZ }
2422 //ZZ return;
2423 //ZZ case ARMin_NUnaryS:
2424 //ZZ vex_printf("%s%s%s ",
2425 //ZZ showARMNeonUnOpS(i->ARMin.NUnaryS.op),
2426 //ZZ showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
2427 //ZZ showARMNeonDataSize(i));
2428 //ZZ ppARMNRS(i->ARMin.NUnaryS.dst);
2429 //ZZ vex_printf(", ");
2430 //ZZ ppARMNRS(i->ARMin.NUnaryS.src);
2431 //ZZ return;
2432 //ZZ case ARMin_NShift:
2433 //ZZ vex_printf("%s%s%s ",
2434 //ZZ showARMNeonShiftOp(i->ARMin.NShift.op),
2435 //ZZ showARMNeonShiftOpDataType(i->ARMin.NShift.op),
2436 //ZZ showARMNeonDataSize(i));
2437 //ZZ ppHRegARM(i->ARMin.NShift.dst);
2438 //ZZ vex_printf(", ");
2439 //ZZ ppHRegARM(i->ARMin.NShift.argL);
2440 //ZZ vex_printf(", ");
2441 //ZZ ppHRegARM(i->ARMin.NShift.argR);
2442 //ZZ return;
2443 //ZZ case ARMin_NShl64:
2444 //ZZ vex_printf("vshl.i64 ");
2445 //ZZ ppHRegARM(i->ARMin.NShl64.dst);
2446 //ZZ vex_printf(", ");
2447 //ZZ ppHRegARM(i->ARMin.NShl64.src);
2448 //ZZ vex_printf(", #%u", i->ARMin.NShl64.amt);
2449 //ZZ return;
2450 //ZZ case ARMin_NDual:
2451 //ZZ vex_printf("%s%s%s ",
2452 //ZZ showARMNeonDualOp(i->ARMin.NDual.op),
2453 //ZZ showARMNeonDualOpDataType(i->ARMin.NDual.op),
2454 //ZZ showARMNeonDataSize(i));
2455 //ZZ ppHRegARM(i->ARMin.NDual.arg1);
2456 //ZZ vex_printf(", ");
2457 //ZZ ppHRegARM(i->ARMin.NDual.arg2);
2458 //ZZ return;
2459 //ZZ case ARMin_NBinary:
2460 //ZZ vex_printf("%s%s%s",
2461 //ZZ showARMNeonBinOp(i->ARMin.NBinary.op),
2462 //ZZ showARMNeonBinOpDataType(i->ARMin.NBinary.op),
2463 //ZZ showARMNeonDataSize(i));
2464 //ZZ vex_printf(" ");
2465 //ZZ ppHRegARM(i->ARMin.NBinary.dst);
2466 //ZZ vex_printf(", ");
2467 //ZZ ppHRegARM(i->ARMin.NBinary.argL);
2468 //ZZ vex_printf(", ");
2469 //ZZ ppHRegARM(i->ARMin.NBinary.argR);
2470 //ZZ return;
2471 case ARM64in_VImmQ:
2472 vex_printf("qimm ");
2473 ppHRegARM64(i->ARM64in.VImmQ.rQ);
2474 vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm);
2475 return;
2476 case ARM64in_VDfromX:
2477 vex_printf("fmov ");
2478 ppHRegARM64(i->ARM64in.VDfromX.rD);
2479 vex_printf(", ");
2480 ppHRegARM64(i->ARM64in.VDfromX.rX);
2481 return;
2482 case ARM64in_VQfromXX:
2483 vex_printf("qFromXX ");
2484 ppHRegARM64(i->ARM64in.VQfromXX.rQ);
2485 vex_printf(", ");
2486 ppHRegARM64(i->ARM64in.VQfromXX.rXhi);
2487 vex_printf(", ");
2488 ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
2489 return;
2490 case ARM64in_VXfromQ:
2491 vex_printf("mov ");
2492 ppHRegARM64(i->ARM64in.VXfromQ.rX);
2493 vex_printf(", ");
2494 ppHRegARM64(i->ARM64in.VXfromQ.rQ);
2495 vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
2496 return;
2497 case ARM64in_VMov: {
2498 UChar aux = '?';
2499 switch (i->ARM64in.VMov.szB) {
2500 case 16: aux = 'q'; break;
2501 case 8: aux = 'd'; break;
2502 case 4: aux = 's'; break;
2503 default: break;
2504 }
2505 vex_printf("mov(%c) ", aux);
2506 ppHRegARM64(i->ARM64in.VMov.dst);
2507 vex_printf(", ");
2508 ppHRegARM64(i->ARM64in.VMov.src);
2509 return;
2510 }
2511 //ZZ case ARMin_NCMovQ:
2512 //ZZ vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
2513 //ZZ ppHRegARM(i->ARMin.NCMovQ.dst);
2514 //ZZ vex_printf(", ");
2515 //ZZ ppHRegARM(i->ARMin.NCMovQ.src);
2516 //ZZ return;
2517 //ZZ case ARMin_Add32:
2518 //ZZ vex_printf("add32 ");
2519 //ZZ ppHRegARM(i->ARMin.Add32.rD);
2520 //ZZ vex_printf(", ");
2521 //ZZ ppHRegARM(i->ARMin.Add32.rN);
2522 //ZZ vex_printf(", ");
2523 //ZZ vex_printf("%d", i->ARMin.Add32.imm32);
2524 //ZZ return;
2525 case ARM64in_EvCheck:
2526 vex_printf("(evCheck) ldr w9,");
2527 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
2528 vex_printf("; subs w9,w9,$1; str w9,");
2529 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
2530 vex_printf("; bpl nofail; ldr x9,");
2531 ppARM64AMode(i->ARM64in.EvCheck.amFailAddr);
2532 vex_printf("; br x9; nofail:");
2533 return;
2534 //ZZ case ARMin_ProfInc:
2535 //ZZ vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
2536 //ZZ "movw r12,HI16($NotKnownYet); "
2537 //ZZ "ldr r11,[r12]; "
2538 //ZZ "adds r11,r11,$1; "
2539 //ZZ "str r11,[r12]; "
2540 //ZZ "ldr r11,[r12+4]; "
2541 //ZZ "adc r11,r11,$0; "
2542 //ZZ "str r11,[r12+4]");
2543 //ZZ return;
2544 default:
2545 vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag);
2546 vpanic("ppARM64Instr(1)");
2547 return;
2548 }
2549 }
2550
2551
2552 /* --------- Helpers for register allocation. --------- */
2553
getRegUsage_ARM64Instr(HRegUsage * u,ARM64Instr * i,Bool mode64)2554 void getRegUsage_ARM64Instr ( HRegUsage* u, ARM64Instr* i, Bool mode64 )
2555 {
2556 vassert(mode64 == True);
2557 initHRegUsage(u);
2558 switch (i->tag) {
2559 case ARM64in_Arith:
2560 addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst);
2561 addHRegUse(u, HRmRead, i->ARM64in.Arith.argL);
2562 addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR);
2563 return;
2564 case ARM64in_Cmp:
2565 addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL);
2566 addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR);
2567 return;
2568 case ARM64in_Logic:
2569 addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst);
2570 addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
2571 addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
2572 return;
2573 case ARM64in_Test:
2574 addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
2575 addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
2576 return;
2577 case ARM64in_Shift:
2578 addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst);
2579 addHRegUse(u, HRmRead, i->ARM64in.Shift.argL);
2580 addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR);
2581 return;
2582 case ARM64in_Unary:
2583 addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst);
2584 addHRegUse(u, HRmRead, i->ARM64in.Unary.src);
2585 return;
2586 case ARM64in_MovI:
2587 addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
2588 addHRegUse(u, HRmRead, i->ARM64in.MovI.src);
2589 return;
2590 case ARM64in_Imm64:
2591 addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
2592 return;
2593 case ARM64in_LdSt64:
2594 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode);
2595 if (i->ARM64in.LdSt64.isLoad) {
2596 addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD);
2597 } else {
2598 addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD);
2599 }
2600 return;
2601 case ARM64in_LdSt32:
2602 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode);
2603 if (i->ARM64in.LdSt32.isLoad) {
2604 addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD);
2605 } else {
2606 addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD);
2607 }
2608 return;
2609 case ARM64in_LdSt16:
2610 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode);
2611 if (i->ARM64in.LdSt16.isLoad) {
2612 addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD);
2613 } else {
2614 addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD);
2615 }
2616 return;
2617 case ARM64in_LdSt8:
2618 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode);
2619 if (i->ARM64in.LdSt8.isLoad) {
2620 addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD);
2621 } else {
2622 addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD);
2623 }
2624 return;
2625 /* XDirect/XIndir/XAssisted are also a bit subtle. They
2626 conditionally exit the block. Hence we only need to list (1)
2627 the registers that they read, and (2) the registers that they
2628 write in the case where the block is not exited. (2) is
2629 empty, hence only (1) is relevant here. */
2630 case ARM64in_XDirect:
2631 addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC);
2632 return;
2633 case ARM64in_XIndir:
2634 addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA);
2635 addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC);
2636 return;
2637 case ARM64in_XAssisted:
2638 addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA);
2639 addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC);
2640 return;
2641 case ARM64in_CSel:
2642 addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst);
2643 addHRegUse(u, HRmRead, i->ARM64in.CSel.argL);
2644 addHRegUse(u, HRmRead, i->ARM64in.CSel.argR);
2645 return;
2646 case ARM64in_Call:
2647 /* logic and comments copied/modified from x86 back end */
2648 /* This is a bit subtle. */
2649 /* First off, claim it trashes all the caller-saved regs
2650 which fall within the register allocator's jurisdiction.
2651 These I believe to be x0 to x7. Also need to be
2652 careful about vector regs. */
2653 addHRegUse(u, HRmWrite, hregARM64_X0());
2654 addHRegUse(u, HRmWrite, hregARM64_X1());
2655 addHRegUse(u, HRmWrite, hregARM64_X2());
2656 addHRegUse(u, HRmWrite, hregARM64_X3());
2657 addHRegUse(u, HRmWrite, hregARM64_X4());
2658 addHRegUse(u, HRmWrite, hregARM64_X5());
2659 addHRegUse(u, HRmWrite, hregARM64_X6());
2660 addHRegUse(u, HRmWrite, hregARM64_X7());
2661 addHRegUse(u, HRmWrite, hregARM64_Q16());
2662 addHRegUse(u, HRmWrite, hregARM64_Q17());
2663 addHRegUse(u, HRmWrite, hregARM64_Q18());
2664 /* Now we have to state any parameter-carrying registers
2665 which might be read. This depends on nArgRegs. */
2666 switch (i->ARM64in.Call.nArgRegs) {
2667 case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/
2668 case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/
2669 case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/
2670 case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/
2671 case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/
2672 case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/
2673 case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/
2674 case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break;
2675 case 0: break;
2676 default: vpanic("getRegUsage_ARM64:Call:regparms");
2677 }
2678 /* Finally, there is the issue that the insn trashes a
2679 register because the literal target address has to be
2680 loaded into a register. However, we reserve x9 for that
2681 purpose so there's no further complexity here. Stating x9
2682 as trashed is pointless since it's not under the control
2683 of the allocator, but what the hell. */
2684 addHRegUse(u, HRmWrite, hregARM64_X9());
2685 return;
2686 case ARM64in_AddToSP:
2687 /* Only changes SP, but regalloc doesn't control that, hence
2688 we don't care. */
2689 return;
2690 case ARM64in_FromSP:
2691 addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst);
2692 return;
2693 case ARM64in_Mul:
2694 addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst);
2695 addHRegUse(u, HRmRead, i->ARM64in.Mul.argL);
2696 addHRegUse(u, HRmRead, i->ARM64in.Mul.argR);
2697 return;
2698 case ARM64in_LdrEX:
2699 addHRegUse(u, HRmRead, hregARM64_X4());
2700 addHRegUse(u, HRmWrite, hregARM64_X2());
2701 return;
2702 case ARM64in_StrEX:
2703 addHRegUse(u, HRmRead, hregARM64_X4());
2704 addHRegUse(u, HRmWrite, hregARM64_X0());
2705 addHRegUse(u, HRmRead, hregARM64_X2());
2706 return;
2707 case ARM64in_MFence:
2708 return;
2709 //ZZ case ARMin_CLREX:
2710 //ZZ return;
2711 case ARM64in_VLdStS:
2712 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
2713 if (i->ARM64in.VLdStS.isLoad) {
2714 addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD);
2715 } else {
2716 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD);
2717 }
2718 return;
2719 case ARM64in_VLdStD:
2720 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN);
2721 if (i->ARM64in.VLdStD.isLoad) {
2722 addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD);
2723 } else {
2724 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD);
2725 }
2726 return;
2727 case ARM64in_VLdStQ:
2728 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN);
2729 if (i->ARM64in.VLdStQ.isLoad)
2730 addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ);
2731 else
2732 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ);
2733 return;
2734 case ARM64in_VCvtI2F:
2735 addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS);
2736 addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD);
2737 return;
2738 case ARM64in_VCvtF2I:
2739 addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS);
2740 addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD);
2741 return;
2742 case ARM64in_VCvtSD:
2743 addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
2744 addHRegUse(u, HRmRead, i->ARM64in.VCvtSD.src);
2745 return;
2746 case ARM64in_VUnaryD:
2747 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
2748 addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
2749 return;
2750 case ARM64in_VUnaryS:
2751 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst);
2752 addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src);
2753 return;
2754 case ARM64in_VBinD:
2755 addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst);
2756 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL);
2757 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR);
2758 return;
2759 case ARM64in_VBinS:
2760 addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst);
2761 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
2762 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
2763 return;
2764 case ARM64in_VCmpD:
2765 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
2766 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
2767 return;
2768 case ARM64in_VCmpS:
2769 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL);
2770 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR);
2771 return;
2772 case ARM64in_FPCR:
2773 if (i->ARM64in.FPCR.toFPCR)
2774 addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg);
2775 else
2776 addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg);
2777 return;
2778 case ARM64in_VBinV:
2779 addHRegUse(u, HRmWrite, i->ARM64in.VBinV.dst);
2780 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL);
2781 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR);
2782 return;
2783 case ARM64in_VUnaryV:
2784 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst);
2785 addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg);
2786 return;
2787 case ARM64in_VNarrowV:
2788 addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst);
2789 addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src);
2790 return;
2791 case ARM64in_VShiftImmV:
2792 addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
2793 addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
2794 return;
2795 //ZZ case ARMin_VAluS:
2796 //ZZ addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2797 //ZZ addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2798 //ZZ addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2799 //ZZ return;
2800 //ZZ case ARMin_VUnaryS:
2801 //ZZ addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2802 //ZZ addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2803 //ZZ return;
2804 //ZZ case ARMin_VCMovD:
2805 //ZZ addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2806 //ZZ addHRegUse(u, HRmRead, i->ARMin.VCMovD.dst);
2807 //ZZ addHRegUse(u, HRmRead, i->ARMin.VCMovD.src);
2808 //ZZ return;
2809 //ZZ case ARMin_VCMovS:
2810 //ZZ addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2811 //ZZ addHRegUse(u, HRmRead, i->ARMin.VCMovS.dst);
2812 //ZZ addHRegUse(u, HRmRead, i->ARMin.VCMovS.src);
2813 //ZZ return;
2814 //ZZ case ARMin_VXferD:
2815 //ZZ if (i->ARMin.VXferD.toD) {
2816 //ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2817 //ZZ addHRegUse(u, HRmRead, i->ARMin.VXferD.rHi);
2818 //ZZ addHRegUse(u, HRmRead, i->ARMin.VXferD.rLo);
2819 //ZZ } else {
2820 //ZZ addHRegUse(u, HRmRead, i->ARMin.VXferD.dD);
2821 //ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2822 //ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2823 //ZZ }
2824 //ZZ return;
2825 //ZZ case ARMin_VXferS:
2826 //ZZ if (i->ARMin.VXferS.toS) {
2827 //ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2828 //ZZ addHRegUse(u, HRmRead, i->ARMin.VXferS.rLo);
2829 //ZZ } else {
2830 //ZZ addHRegUse(u, HRmRead, i->ARMin.VXferS.fD);
2831 //ZZ addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2832 //ZZ }
2833 //ZZ return;
2834 //ZZ case ARMin_VCvtID:
2835 //ZZ addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2836 //ZZ addHRegUse(u, HRmRead, i->ARMin.VCvtID.src);
2837 //ZZ return;
2838 //ZZ case ARMin_NLdStD:
2839 //ZZ if (i->ARMin.NLdStD.isLoad)
2840 //ZZ addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2841 //ZZ else
2842 //ZZ addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2843 //ZZ addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2844 //ZZ return;
2845 //ZZ case ARMin_NUnary:
2846 //ZZ addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2847 //ZZ addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2848 //ZZ return;
2849 //ZZ case ARMin_NUnaryS:
2850 //ZZ addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2851 //ZZ addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2852 //ZZ return;
2853 //ZZ case ARMin_NShift:
2854 //ZZ addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2855 //ZZ addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2856 //ZZ addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2857 //ZZ return;
2858 //ZZ case ARMin_NShl64:
2859 //ZZ addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
2860 //ZZ addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
2861 //ZZ return;
2862 //ZZ case ARMin_NDual:
2863 //ZZ addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2864 //ZZ addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2865 //ZZ addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2866 //ZZ addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2867 //ZZ return;
2868 case ARM64in_VImmQ:
2869 addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ);
2870 return;
2871 case ARM64in_VDfromX:
2872 addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD);
2873 addHRegUse(u, HRmRead, i->ARM64in.VDfromX.rX);
2874 return;
2875 case ARM64in_VQfromXX:
2876 addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ);
2877 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXhi);
2878 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXlo);
2879 return;
2880 case ARM64in_VXfromQ:
2881 addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
2882 addHRegUse(u, HRmRead, i->ARM64in.VXfromQ.rQ);
2883 return;
2884 case ARM64in_VMov:
2885 addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
2886 addHRegUse(u, HRmRead, i->ARM64in.VMov.src);
2887 return;
2888 //ZZ case ARMin_NBinary:
2889 //ZZ addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2890 //ZZ /* TODO: sometimes dst is also being read! */
2891 //ZZ // XXX fix this
2892 //ZZ addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2893 //ZZ addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2894 //ZZ return;
2895 //ZZ case ARMin_NCMovQ:
2896 //ZZ addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2897 //ZZ addHRegUse(u, HRmRead, i->ARMin.NCMovQ.dst);
2898 //ZZ addHRegUse(u, HRmRead, i->ARMin.NCMovQ.src);
2899 //ZZ return;
2900 //ZZ case ARMin_Add32:
2901 //ZZ addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2902 //ZZ addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2903 //ZZ return;
2904 case ARM64in_EvCheck:
2905 /* We expect both amodes only to mention x21, so this is in
2906 fact pointless, since x21 isn't allocatable, but
2907 anyway.. */
2908 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter);
2909 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr);
2910 addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */
2911 return;
2912 //ZZ case ARMin_ProfInc:
2913 //ZZ addHRegUse(u, HRmWrite, hregARM_R12());
2914 //ZZ addHRegUse(u, HRmWrite, hregARM_R11());
2915 //ZZ return;
2916 default:
2917 ppARM64Instr(i);
2918 vpanic("getRegUsage_ARM64Instr");
2919 }
2920 }
2921
2922
mapRegs_ARM64Instr(HRegRemap * m,ARM64Instr * i,Bool mode64)2923 void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
2924 {
2925 vassert(mode64 == True);
2926 switch (i->tag) {
2927 case ARM64in_Arith:
2928 i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst);
2929 i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL);
2930 mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR);
2931 return;
2932 case ARM64in_Cmp:
2933 i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL);
2934 mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR);
2935 return;
2936 case ARM64in_Logic:
2937 i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst);
2938 i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
2939 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2940 return;
2941 case ARM64in_Test:
2942 i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
2943 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2944 return;
2945 case ARM64in_Shift:
2946 i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst);
2947 i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL);
2948 mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR);
2949 return;
2950 case ARM64in_Unary:
2951 i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst);
2952 i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src);
2953 return;
2954 case ARM64in_MovI:
2955 i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst);
2956 i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src);
2957 return;
2958 case ARM64in_Imm64:
2959 i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst);
2960 return;
2961 case ARM64in_LdSt64:
2962 i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD);
2963 mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode);
2964 return;
2965 case ARM64in_LdSt32:
2966 i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD);
2967 mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode);
2968 return;
2969 case ARM64in_LdSt16:
2970 i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD);
2971 mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode);
2972 return;
2973 case ARM64in_LdSt8:
2974 i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD);
2975 mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode);
2976 return;
2977 case ARM64in_XDirect:
2978 mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC);
2979 return;
2980 case ARM64in_XIndir:
2981 i->ARM64in.XIndir.dstGA
2982 = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA);
2983 mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC);
2984 return;
2985 case ARM64in_XAssisted:
2986 i->ARM64in.XAssisted.dstGA
2987 = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA);
2988 mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC);
2989 return;
2990 case ARM64in_CSel:
2991 i->ARM64in.CSel.dst = lookupHRegRemap(m, i->ARM64in.CSel.dst);
2992 i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL);
2993 i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR);
2994 return;
2995 case ARM64in_Call:
2996 return;
2997 case ARM64in_AddToSP:
2998 return;
2999 case ARM64in_FromSP:
3000 i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst);
3001 return;
3002 case ARM64in_Mul:
3003 i->ARM64in.Mul.dst = lookupHRegRemap(m, i->ARM64in.Mul.dst);
3004 i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL);
3005 i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR);
3006 break;
3007 case ARM64in_LdrEX:
3008 return;
3009 case ARM64in_StrEX:
3010 return;
3011 case ARM64in_MFence:
3012 return;
3013 //ZZ case ARMin_CLREX:
3014 //ZZ return;
3015 case ARM64in_VLdStS:
3016 i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
3017 i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
3018 return;
3019 case ARM64in_VLdStD:
3020 i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD);
3021 i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN);
3022 return;
3023 case ARM64in_VLdStQ:
3024 i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ);
3025 i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN);
3026 return;
3027 case ARM64in_VCvtI2F:
3028 i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS);
3029 i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD);
3030 return;
3031 case ARM64in_VCvtF2I:
3032 i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS);
3033 i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD);
3034 return;
3035 case ARM64in_VCvtSD:
3036 i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
3037 i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
3038 return;
3039 case ARM64in_VUnaryD:
3040 i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
3041 i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
3042 return;
3043 case ARM64in_VUnaryS:
3044 i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst);
3045 i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src);
3046 return;
3047 case ARM64in_VBinD:
3048 i->ARM64in.VBinD.dst = lookupHRegRemap(m, i->ARM64in.VBinD.dst);
3049 i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL);
3050 i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR);
3051 return;
3052 case ARM64in_VBinS:
3053 i->ARM64in.VBinS.dst = lookupHRegRemap(m, i->ARM64in.VBinS.dst);
3054 i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
3055 i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
3056 return;
3057 case ARM64in_VCmpD:
3058 i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
3059 i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
3060 return;
3061 case ARM64in_VCmpS:
3062 i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL);
3063 i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR);
3064 return;
3065 case ARM64in_FPCR:
3066 i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg);
3067 return;
3068 case ARM64in_VBinV:
3069 i->ARM64in.VBinV.dst = lookupHRegRemap(m, i->ARM64in.VBinV.dst);
3070 i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL);
3071 i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR);
3072 return;
3073 case ARM64in_VUnaryV:
3074 i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst);
3075 i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg);
3076 return;
3077 case ARM64in_VNarrowV:
3078 i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst);
3079 i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src);
3080 return;
3081 case ARM64in_VShiftImmV:
3082 i->ARM64in.VShiftImmV.dst
3083 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst);
3084 i->ARM64in.VShiftImmV.src
3085 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
3086 return;
3087 //ZZ case ARMin_VAluS:
3088 //ZZ i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst);
3089 //ZZ i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
3090 //ZZ i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
3091 //ZZ return;
3092 //ZZ case ARMin_VCMovD:
3093 //ZZ i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
3094 //ZZ i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
3095 //ZZ return;
3096 //ZZ case ARMin_VCMovS:
3097 //ZZ i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
3098 //ZZ i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
3099 //ZZ return;
3100 //ZZ case ARMin_VXferD:
3101 //ZZ i->ARMin.VXferD.dD = lookupHRegRemap(m, i->ARMin.VXferD.dD);
3102 //ZZ i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
3103 //ZZ i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
3104 //ZZ return;
3105 //ZZ case ARMin_VXferS:
3106 //ZZ i->ARMin.VXferS.fD = lookupHRegRemap(m, i->ARMin.VXferS.fD);
3107 //ZZ i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
3108 //ZZ return;
3109 //ZZ case ARMin_VCvtID:
3110 //ZZ i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
3111 //ZZ i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
3112 //ZZ return;
3113 //ZZ case ARMin_NLdStD:
3114 //ZZ i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
3115 //ZZ mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
3116 //ZZ return;
3117 //ZZ case ARMin_NUnary:
3118 //ZZ i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
3119 //ZZ i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
3120 //ZZ return;
3121 //ZZ case ARMin_NUnaryS:
3122 //ZZ i->ARMin.NUnaryS.src->reg
3123 //ZZ = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
3124 //ZZ i->ARMin.NUnaryS.dst->reg
3125 //ZZ = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
3126 //ZZ return;
3127 //ZZ case ARMin_NShift:
3128 //ZZ i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
3129 //ZZ i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
3130 //ZZ i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
3131 //ZZ return;
3132 //ZZ case ARMin_NShl64:
3133 //ZZ i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
3134 //ZZ i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
3135 //ZZ return;
3136 //ZZ case ARMin_NDual:
3137 //ZZ i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
3138 //ZZ i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
3139 //ZZ return;
3140 case ARM64in_VImmQ:
3141 i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ);
3142 return;
3143 case ARM64in_VDfromX:
3144 i->ARM64in.VDfromX.rD
3145 = lookupHRegRemap(m, i->ARM64in.VDfromX.rD);
3146 i->ARM64in.VDfromX.rX
3147 = lookupHRegRemap(m, i->ARM64in.VDfromX.rX);
3148 return;
3149 case ARM64in_VQfromXX:
3150 i->ARM64in.VQfromXX.rQ
3151 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ);
3152 i->ARM64in.VQfromXX.rXhi
3153 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi);
3154 i->ARM64in.VQfromXX.rXlo
3155 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo);
3156 return;
3157 case ARM64in_VXfromQ:
3158 i->ARM64in.VXfromQ.rX
3159 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX);
3160 i->ARM64in.VXfromQ.rQ
3161 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
3162 return;
3163 case ARM64in_VMov:
3164 i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
3165 i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
3166 return;
3167
3168 //ZZ case ARMin_NBinary:
3169 //ZZ i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
3170 //ZZ i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
3171 //ZZ i->ARMin.NBinary.dst = lookupHRegRemap(m, i->ARMin.NBinary.dst);
3172 //ZZ return;
3173 //ZZ case ARMin_NCMovQ:
3174 //ZZ i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
3175 //ZZ i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
3176 //ZZ return;
3177 //ZZ case ARMin_Add32:
3178 //ZZ i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
3179 //ZZ i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
3180 //ZZ return;
3181 case ARM64in_EvCheck:
3182 /* We expect both amodes only to mention x21, so this is in
3183 fact pointless, since x21 isn't allocatable, but
3184 anyway.. */
3185 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter);
3186 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr);
3187 return;
3188 //ZZ case ARMin_ProfInc:
3189 //ZZ /* hardwires r11 and r12 -- nothing to modify. */
3190 //ZZ return;
3191 default:
3192 ppARM64Instr(i);
3193 vpanic("mapRegs_ARM64Instr");
3194 }
3195 }
3196
3197 /* Figure out if i represents a reg-reg move, and if so assign the
3198 source and destination to *src and *dst. If in doubt say No. Used
3199 by the register allocator to do move coalescing.
3200 */
isMove_ARM64Instr(ARM64Instr * i,HReg * src,HReg * dst)3201 Bool isMove_ARM64Instr ( ARM64Instr* i, HReg* src, HReg* dst )
3202 {
3203 switch (i->tag) {
3204 case ARM64in_MovI:
3205 *src = i->ARM64in.MovI.src;
3206 *dst = i->ARM64in.MovI.dst;
3207 return True;
3208 case ARM64in_VMov:
3209 *src = i->ARM64in.VMov.src;
3210 *dst = i->ARM64in.VMov.dst;
3211 return True;
3212 default:
3213 break;
3214 }
3215
3216 return False;
3217 }
3218
3219
3220 /* Generate arm spill/reload instructions under the direction of the
3221 register allocator. Note it's critical these don't write the
3222 condition codes. */
3223
genSpill_ARM64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)3224 void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
3225 HReg rreg, Int offsetB, Bool mode64 )
3226 {
3227 HRegClass rclass;
3228 vassert(offsetB >= 0);
3229 vassert(!hregIsVirtual(rreg));
3230 vassert(mode64 == True);
3231 *i1 = *i2 = NULL;
3232 rclass = hregClass(rreg);
3233 switch (rclass) {
3234 case HRcInt64:
3235 vassert(0 == (offsetB & 7));
3236 offsetB >>= 3;
3237 vassert(offsetB < 4096);
3238 *i1 = ARM64Instr_LdSt64(
3239 False/*!isLoad*/,
3240 rreg,
3241 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
3242 );
3243 return;
3244 case HRcFlt64:
3245 vassert(0 == (offsetB & 7));
3246 vassert(offsetB >= 0 && offsetB < 32768);
3247 *i1 = ARM64Instr_VLdStD(False/*!isLoad*/,
3248 rreg, hregARM64_X21(), offsetB);
3249 return;
3250 case HRcVec128: {
3251 HReg x21 = hregARM64_X21(); // baseblock
3252 HReg x9 = hregARM64_X9(); // spill temporary
3253 vassert(0 == (offsetB & 15)); // check sane alignment
3254 vassert(offsetB < 4096);
3255 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
3256 *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9);
3257 return;
3258 }
3259 default:
3260 ppHRegClass(rclass);
3261 vpanic("genSpill_ARM: unimplemented regclass");
3262 }
3263 }
3264
genReload_ARM64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)3265 void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
3266 HReg rreg, Int offsetB, Bool mode64 )
3267 {
3268 HRegClass rclass;
3269 vassert(offsetB >= 0);
3270 vassert(!hregIsVirtual(rreg));
3271 vassert(mode64 == True);
3272 *i1 = *i2 = NULL;
3273 rclass = hregClass(rreg);
3274 switch (rclass) {
3275 case HRcInt64:
3276 vassert(0 == (offsetB & 7));
3277 offsetB >>= 3;
3278 vassert(offsetB < 4096);
3279 *i1 = ARM64Instr_LdSt64(
3280 True/*isLoad*/,
3281 rreg,
3282 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
3283 );
3284 return;
3285 case HRcFlt64:
3286 vassert(0 == (offsetB & 7));
3287 vassert(offsetB >= 0 && offsetB < 32768);
3288 *i1 = ARM64Instr_VLdStD(True/*isLoad*/,
3289 rreg, hregARM64_X21(), offsetB);
3290 return;
3291 case HRcVec128: {
3292 HReg x21 = hregARM64_X21(); // baseblock
3293 HReg x9 = hregARM64_X9(); // spill temporary
3294 vassert(0 == (offsetB & 15)); // check sane alignment
3295 vassert(offsetB < 4096);
3296 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
3297 *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9);
3298 return;
3299 }
3300 default:
3301 ppHRegClass(rclass);
3302 vpanic("genReload_ARM: unimplemented regclass");
3303 }
3304 }
3305
3306
3307 //ZZ /* Emit an instruction into buf and return the number of bytes used.
3308 //ZZ Note that buf is not the insn's final place, and therefore it is
3309 //ZZ imperative to emit position-independent code. */
3310
iregNo(HReg r)3311 static inline UChar iregNo ( HReg r )
3312 {
3313 UInt n;
3314 vassert(hregClass(r) == HRcInt64);
3315 vassert(!hregIsVirtual(r));
3316 n = hregNumber(r);
3317 vassert(n <= 30);
3318 return toUChar(n);
3319 }
3320
dregNo(HReg r)3321 static inline UChar dregNo ( HReg r )
3322 {
3323 UInt n;
3324 vassert(hregClass(r) == HRcFlt64 || hregClass(r) == HRcInt64);
3325 vassert(!hregIsVirtual(r));
3326 n = hregNumber(r);
3327 vassert(n <= 31);
3328 return toUChar(n);
3329 }
3330
qregNo(HReg r)3331 static inline UChar qregNo ( HReg r )
3332 {
3333 UInt n;
3334 vassert(hregClass(r) == HRcVec128);
3335 vassert(!hregIsVirtual(r));
3336 n = hregNumber(r);
3337 vassert(n <= 31);
3338 return toUChar(n);
3339 }
3340
3341 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
3342 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
3343
3344 #define X00 BITS4(0,0, 0,0)
3345 #define X01 BITS4(0,0, 0,1)
3346 #define X10 BITS4(0,0, 1,0)
3347 #define X11 BITS4(0,0, 1,1)
3348
3349 #define X000 BITS4(0, 0,0,0)
3350 #define X001 BITS4(0, 0,0,1)
3351 #define X010 BITS4(0, 0,1,0)
3352 #define X011 BITS4(0, 0,1,1)
3353 #define X100 BITS4(0, 1,0,0)
3354 #define X101 BITS4(0, 1,0,1)
3355 #define X110 BITS4(0, 1,1,0)
3356 #define X111 BITS4(0, 1,1,1)
3357
3358 #define X0000 BITS4(0,0,0,0)
3359 #define X0001 BITS4(0,0,0,1)
3360 #define X0010 BITS4(0,0,1,0)
3361 #define X0011 BITS4(0,0,1,1)
3362
3363 #define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
3364 ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
3365
3366 #define X00000 BITS8(0,0,0, 0,0,0,0,0)
3367 #define X00001 BITS8(0,0,0, 0,0,0,0,1)
3368 #define X00111 BITS8(0,0,0, 0,0,1,1,1)
3369 #define X01000 BITS8(0,0,0, 0,1,0,0,0)
3370 #define X10000 BITS8(0,0,0, 1,0,0,0,0)
3371 #define X11000 BITS8(0,0,0, 1,1,0,0,0)
3372 #define X11110 BITS8(0,0,0, 1,1,1,1,0)
3373 #define X11111 BITS8(0,0,0, 1,1,1,1,1)
3374
3375 #define X000000 BITS8(0,0, 0,0,0,0,0,0)
3376 #define X000001 BITS8(0,0, 0,0,0,0,0,1)
3377 #define X000100 BITS8(0,0, 0,0,0,1,0,0)
3378 #define X000111 BITS8(0,0, 0,0,0,1,1,1)
3379 #define X001000 BITS8(0,0, 0,0,1,0,0,0)
3380 #define X001001 BITS8(0,0, 0,0,1,0,0,1)
3381 #define X001010 BITS8(0,0, 0,0,1,0,1,0)
3382 #define X001101 BITS8(0,0, 0,0,1,1,0,1)
3383 #define X001110 BITS8(0,0, 0,0,1,1,1,0)
3384 #define X001111 BITS8(0,0, 0,0,1,1,1,1)
3385 #define X010000 BITS8(0,0, 0,1,0,0,0,0)
3386 #define X010001 BITS8(0,0, 0,1,0,0,0,1)
3387 #define X010101 BITS8(0,0, 0,1,0,1,0,1)
3388 #define X010110 BITS8(0,0, 0,1,0,1,1,0)
3389 #define X011001 BITS8(0,0, 0,1,1,0,0,1)
3390 #define X011010 BITS8(0,0, 0,1,1,0,1,0)
3391 #define X011011 BITS8(0,0, 0,1,1,0,1,1)
3392 #define X011110 BITS8(0,0, 0,1,1,1,1,0)
3393 #define X011111 BITS8(0,0, 0,1,1,1,1,1)
3394 #define X100000 BITS8(0,0, 1,0,0,0,0,0)
3395 #define X100001 BITS8(0,0, 1,0,0,0,0,1)
3396 #define X100011 BITS8(0,0, 1,0,0,0,1,1)
3397 #define X100100 BITS8(0,0, 1,0,0,1,0,0)
3398 #define X100101 BITS8(0,0, 1,0,0,1,0,1)
3399 #define X100110 BITS8(0,0, 1,0,0,1,1,0)
3400 #define X100111 BITS8(0,0, 1,0,0,1,1,1)
3401 #define X101000 BITS8(0,0, 1,0,1,0,0,0)
3402 #define X110000 BITS8(0,0, 1,1,0,0,0,0)
3403 #define X110001 BITS8(0,0, 1,1,0,0,0,1)
3404 #define X110101 BITS8(0,0, 1,1,0,1,0,1)
3405 #define X110111 BITS8(0,0, 1,1,0,1,1,1)
3406 #define X111000 BITS8(0,0, 1,1,1,0,0,0)
3407 #define X111001 BITS8(0,0, 1,1,1,0,0,1)
3408 #define X111101 BITS8(0,0, 1,1,1,1,0,1)
3409 #define X111110 BITS8(0,0, 1,1,1,1,1,0)
3410 #define X111111 BITS8(0,0, 1,1,1,1,1,1)
3411
3412 #define X0001000 BITS8(0, 0,0,0,1,0,0,0)
3413 #define X0010000 BITS8(0, 0,0,1,0,0,0,0)
3414 #define X0100000 BITS8(0, 0,1,0,0,0,0,0)
3415 #define X1000000 BITS8(0, 1,0,0,0,0,0,0)
3416
3417 #define X00100000 BITS8(0,0,1,0,0,0,0,0)
3418 #define X00100001 BITS8(0,0,1,0,0,0,0,1)
3419 #define X00100010 BITS8(0,0,1,0,0,0,1,0)
3420 #define X00100011 BITS8(0,0,1,0,0,0,1,1)
3421 #define X01010000 BITS8(0,1,0,1,0,0,0,0)
3422 #define X01010001 BITS8(0,1,0,1,0,0,0,1)
3423 #define X01010100 BITS8(0,1,0,1,0,1,0,0)
3424 #define X01011000 BITS8(0,1,0,1,1,0,0,0)
3425 #define X01100000 BITS8(0,1,1,0,0,0,0,0)
3426 #define X01100001 BITS8(0,1,1,0,0,0,0,1)
3427 #define X01100010 BITS8(0,1,1,0,0,0,1,0)
3428 #define X01100011 BITS8(0,1,1,0,0,0,1,1)
3429 #define X01110000 BITS8(0,1,1,1,0,0,0,0)
3430 #define X01110001 BITS8(0,1,1,1,0,0,0,1)
3431 #define X01110011 BITS8(0,1,1,1,0,0,1,1)
3432 #define X01110101 BITS8(0,1,1,1,0,1,0,1)
3433 #define X01110111 BITS8(0,1,1,1,0,1,1,1)
3434 #define X11000001 BITS8(1,1,0,0,0,0,0,1)
3435 #define X11000011 BITS8(1,1,0,0,0,0,1,1)
3436 #define X11010100 BITS8(1,1,0,1,0,1,0,0)
3437 #define X11010110 BITS8(1,1,0,1,0,1,1,0)
3438 #define X11011000 BITS8(1,1,0,1,1,0,0,0)
3439 #define X11011010 BITS8(1,1,0,1,1,0,1,0)
3440 #define X11011110 BITS8(1,1,0,1,1,1,1,0)
3441 #define X11110001 BITS8(1,1,1,1,0,0,0,1)
3442 #define X11110011 BITS8(1,1,1,1,0,0,1,1)
3443
3444 #define BITS9(zzb8,zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
3445 ((BITS8(zzb8,zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1) << 1) | zzb0)
3446
3447 #define X111100111 BITS9(1,1,1,1,0,0,1,1,1)
3448 #define X111100101 BITS9(1,1,1,1,0,0,1,0,1)
3449
3450
3451 /* --- 4 fields --- */
3452
X_8_19_1_4(UInt f1,UInt f2,UInt f3,UInt f4)3453 static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) {
3454 vassert(8+19+1+4 == 32);
3455 vassert(f1 < (1<<8));
3456 vassert(f2 < (1<<19));
3457 vassert(f3 < (1<<1));
3458 vassert(f4 < (1<<4));
3459 UInt w = 0;
3460 w = (w << 8) | f1;
3461 w = (w << 19) | f2;
3462 w = (w << 1) | f3;
3463 w = (w << 4) | f4;
3464 return w;
3465 }
3466
3467 /* --- 5 fields --- */
3468
X_3_6_2_16_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5)3469 static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2,
3470 UInt f3, UInt f4, UInt f5 ) {
3471 vassert(3+6+2+16+5 == 32);
3472 vassert(f1 < (1<<3));
3473 vassert(f2 < (1<<6));
3474 vassert(f3 < (1<<2));
3475 vassert(f4 < (1<<16));
3476 vassert(f5 < (1<<5));
3477 UInt w = 0;
3478 w = (w << 3) | f1;
3479 w = (w << 6) | f2;
3480 w = (w << 2) | f3;
3481 w = (w << 16) | f4;
3482 w = (w << 5) | f5;
3483 return w;
3484 }
3485
3486 /* --- 6 fields --- */
3487
X_2_6_2_12_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6)3488 static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3,
3489 UInt f4, UInt f5, UInt f6 ) {
3490 vassert(2+6+2+12+5+5 == 32);
3491 vassert(f1 < (1<<2));
3492 vassert(f2 < (1<<6));
3493 vassert(f3 < (1<<2));
3494 vassert(f4 < (1<<12));
3495 vassert(f5 < (1<<5));
3496 vassert(f6 < (1<<5));
3497 UInt w = 0;
3498 w = (w << 2) | f1;
3499 w = (w << 6) | f2;
3500 w = (w << 2) | f3;
3501 w = (w << 12) | f4;
3502 w = (w << 5) | f5;
3503 w = (w << 5) | f6;
3504 return w;
3505 }
3506
X_3_8_5_6_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6)3507 static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3,
3508 UInt f4, UInt f5, UInt f6 ) {
3509 vassert(3+8+5+6+5+5 == 32);
3510 vassert(f1 < (1<<3));
3511 vassert(f2 < (1<<8));
3512 vassert(f3 < (1<<5));
3513 vassert(f4 < (1<<6));
3514 vassert(f5 < (1<<5));
3515 vassert(f6 < (1<<5));
3516 UInt w = 0;
3517 w = (w << 3) | f1;
3518 w = (w << 8) | f2;
3519 w = (w << 5) | f3;
3520 w = (w << 6) | f4;
3521 w = (w << 5) | f5;
3522 w = (w << 5) | f6;
3523 return w;
3524 }
3525
X_3_5_8_6_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6)3526 static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3,
3527 UInt f4, UInt f5, UInt f6 ) {
3528 vassert(3+8+5+6+5+5 == 32);
3529 vassert(f1 < (1<<3));
3530 vassert(f2 < (1<<5));
3531 vassert(f3 < (1<<8));
3532 vassert(f4 < (1<<6));
3533 vassert(f5 < (1<<5));
3534 vassert(f6 < (1<<5));
3535 UInt w = 0;
3536 w = (w << 3) | f1;
3537 w = (w << 5) | f2;
3538 w = (w << 8) | f3;
3539 w = (w << 6) | f4;
3540 w = (w << 5) | f5;
3541 w = (w << 5) | f6;
3542 return w;
3543 }
3544
X_3_6_7_6_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6)3545 static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3,
3546 UInt f4, UInt f5, UInt f6 ) {
3547 vassert(3+6+7+6+5+5 == 32);
3548 vassert(f1 < (1<<3));
3549 vassert(f2 < (1<<6));
3550 vassert(f3 < (1<<7));
3551 vassert(f4 < (1<<6));
3552 vassert(f5 < (1<<5));
3553 vassert(f6 < (1<<5));
3554 UInt w = 0;
3555 w = (w << 3) | f1;
3556 w = (w << 6) | f2;
3557 w = (w << 7) | f3;
3558 w = (w << 6) | f4;
3559 w = (w << 5) | f5;
3560 w = (w << 5) | f6;
3561 return w;
3562 }
3563
3564 /* --- 7 fields --- */
3565
X_2_6_3_9_2_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6,UInt f7)3566 static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3,
3567 UInt f4, UInt f5, UInt f6, UInt f7 ) {
3568 vassert(2+6+3+9+2+5+5 == 32);
3569 vassert(f1 < (1<<2));
3570 vassert(f2 < (1<<6));
3571 vassert(f3 < (1<<3));
3572 vassert(f4 < (1<<9));
3573 vassert(f5 < (1<<2));
3574 vassert(f6 < (1<<5));
3575 vassert(f7 < (1<<5));
3576 UInt w = 0;
3577 w = (w << 2) | f1;
3578 w = (w << 6) | f2;
3579 w = (w << 3) | f3;
3580 w = (w << 9) | f4;
3581 w = (w << 2) | f5;
3582 w = (w << 5) | f6;
3583 w = (w << 5) | f7;
3584 return w;
3585 }
3586
X_3_6_1_6_6_5_5(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6,UInt f7)3587 static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
3588 UInt f4, UInt f5, UInt f6, UInt f7 ) {
3589 vassert(3+6+1+6+6+5+5 == 32);
3590 vassert(f1 < (1<<3));
3591 vassert(f2 < (1<<6));
3592 vassert(f3 < (1<<1));
3593 vassert(f4 < (1<<6));
3594 vassert(f5 < (1<<6));
3595 vassert(f6 < (1<<5));
3596 vassert(f7 < (1<<5));
3597 UInt w = 0;
3598 w = (w << 3) | f1;
3599 w = (w << 6) | f2;
3600 w = (w << 1) | f3;
3601 w = (w << 6) | f4;
3602 w = (w << 6) | f5;
3603 w = (w << 5) | f6;
3604 w = (w << 5) | f7;
3605 return w;
3606 }
3607
X_9_1_6_4_6_1_1_4(UInt f1,UInt f2,UInt f3,UInt f4,UInt f5,UInt f6,UInt f7,UInt f8)3608 static inline UInt X_9_1_6_4_6_1_1_4( UInt f1, UInt f2, UInt f3, UInt f4,
3609 UInt f5, UInt f6, UInt f7, UInt f8) {
3610 vassert(9+1+6+4+6+1+1+4 == 32);
3611 vassert(f1 < (1<<9));
3612 vassert(f2 < (1<<1));
3613 vassert(f3 < (1<<6));
3614 vassert(f4 < (1<<4));
3615 vassert(f5 < (1<<6));
3616 vassert(f6 < (1<<1));
3617 vassert(f7 < (1<<1));
3618 vassert(f8 < (1<<9));
3619 UInt w = 0;
3620 w = (w << 9) | f1;
3621 w = (w << 1) | f2;
3622 w = (w << 6) | f3;
3623 w = (w << 4) | f4;
3624 w = (w << 6) | f5;
3625 w = (w << 1) | f6;
3626 w = (w << 1) | f7;
3627 w = (w << 4) | f8;
3628 return w;
3629 }
3630
3631
3632 //ZZ #define X0000 BITS4(0,0,0,0)
3633 //ZZ #define X0001 BITS4(0,0,0,1)
3634 //ZZ #define X0010 BITS4(0,0,1,0)
3635 //ZZ #define X0011 BITS4(0,0,1,1)
3636 //ZZ #define X0100 BITS4(0,1,0,0)
3637 //ZZ #define X0101 BITS4(0,1,0,1)
3638 //ZZ #define X0110 BITS4(0,1,1,0)
3639 //ZZ #define X0111 BITS4(0,1,1,1)
3640 //ZZ #define X1000 BITS4(1,0,0,0)
3641 //ZZ #define X1001 BITS4(1,0,0,1)
3642 //ZZ #define X1010 BITS4(1,0,1,0)
3643 //ZZ #define X1011 BITS4(1,0,1,1)
3644 //ZZ #define X1100 BITS4(1,1,0,0)
3645 //ZZ #define X1101 BITS4(1,1,0,1)
3646 //ZZ #define X1110 BITS4(1,1,1,0)
3647 //ZZ #define X1111 BITS4(1,1,1,1)
3648 /*
3649 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
3650 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3651 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3652 (((zzx3) & 0xF) << 12))
3653
3654 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
3655 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3656 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3657 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
3658
3659 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
3660 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3661 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3662 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
3663
3664 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
3665 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3666 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
3667 (((zzx0) & 0xF) << 0))
3668
3669 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
3670 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3671 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3672 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
3673 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
3674
3675 #define XX______(zzx7,zzx6) \
3676 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
3677 */
3678 //ZZ /* Generate a skeletal insn that involves an a RI84 shifter operand.
3679 //ZZ Returns a word which is all zeroes apart from bits 25 and 11..0,
3680 //ZZ since it is those that encode the shifter operand (at least to the
3681 //ZZ extent that we care about it.) */
3682 //ZZ static UInt skeletal_RI84 ( ARMRI84* ri )
3683 //ZZ {
3684 //ZZ UInt instr;
3685 //ZZ if (ri->tag == ARMri84_I84) {
3686 //ZZ vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
3687 //ZZ vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
3688 //ZZ instr = 1 << 25;
3689 //ZZ instr |= (ri->ARMri84.I84.imm4 << 8);
3690 //ZZ instr |= ri->ARMri84.I84.imm8;
3691 //ZZ } else {
3692 //ZZ instr = 0 << 25;
3693 //ZZ instr |= iregNo(ri->ARMri84.R.reg);
3694 //ZZ }
3695 //ZZ return instr;
3696 //ZZ }
3697 //ZZ
3698 //ZZ /* Ditto for RI5. Resulting word is zeroes apart from bit 4 and bits
3699 //ZZ 11..7. */
3700 //ZZ static UInt skeletal_RI5 ( ARMRI5* ri )
3701 //ZZ {
3702 //ZZ UInt instr;
3703 //ZZ if (ri->tag == ARMri5_I5) {
3704 //ZZ UInt imm5 = ri->ARMri5.I5.imm5;
3705 //ZZ vassert(imm5 >= 1 && imm5 <= 31);
3706 //ZZ instr = 0 << 4;
3707 //ZZ instr |= imm5 << 7;
3708 //ZZ } else {
3709 //ZZ instr = 1 << 4;
3710 //ZZ instr |= iregNo(ri->ARMri5.R.reg) << 8;
3711 //ZZ }
3712 //ZZ return instr;
3713 //ZZ }
3714
3715
3716 /* Get an immediate into a register, using only that register. */
imm64_to_iregNo(UInt * p,Int xD,ULong imm64)3717 static UInt* imm64_to_iregNo ( UInt* p, Int xD, ULong imm64 )
3718 {
3719 if (imm64 == 0) {
3720 // This has to be special-cased, since the logic below
3721 // will leave the register unchanged in this case.
3722 // MOVZ xD, #0, LSL #0
3723 *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD);
3724 return p;
3725 }
3726
3727 // There must be at least one non-zero halfword. Find the
3728 // lowest nonzero such, and use MOVZ to install it and zero
3729 // out the rest of the register.
3730 UShort h[4];
3731 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3732 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3733 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3734 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3735
3736 UInt i;
3737 for (i = 0; i < 4; i++) {
3738 if (h[i] != 0)
3739 break;
3740 }
3741 vassert(i < 4);
3742
3743 // MOVZ xD, h[i], LSL (16*i)
3744 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3745
3746 // Work on upwards through h[i], using MOVK to stuff in any
3747 // remaining nonzero elements.
3748 i++;
3749 for (; i < 4; i++) {
3750 if (h[i] == 0)
3751 continue;
3752 // MOVK xD, h[i], LSL (16*i)
3753 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3754 }
3755
3756 return p;
3757 }
3758
3759 /* Get an immediate into a register, using only that register, and
3760 generating exactly 4 instructions, regardless of the value of the
3761 immediate. This is used when generating sections of code that need
3762 to be patched later, so as to guarantee a specific size. */
imm64_to_iregNo_EXACTLY4(UInt * p,Int xD,ULong imm64)3763 static UInt* imm64_to_iregNo_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
3764 {
3765 UShort h[4];
3766 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3767 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3768 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3769 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3770 // Work on upwards through h[i], using MOVK to stuff in the
3771 // remaining elements.
3772 UInt i;
3773 for (i = 0; i < 4; i++) {
3774 if (i == 0) {
3775 // MOVZ xD, h[0], LSL (16*0)
3776 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3777 } else {
3778 // MOVK xD, h[i], LSL (16*i)
3779 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3780 }
3781 }
3782 return p;
3783 }
3784
3785 /* Check whether p points at a 4-insn sequence cooked up by
3786 imm64_to_iregNo_EXACTLY4(). */
is_imm64_to_iregNo_EXACTLY4(UInt * p,Int xD,ULong imm64)3787 static Bool is_imm64_to_iregNo_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
3788 {
3789 UShort h[4];
3790 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3791 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3792 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3793 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3794 // Work on upwards through h[i], using MOVK to stuff in the
3795 // remaining elements.
3796 UInt i;
3797 for (i = 0; i < 4; i++) {
3798 UInt expected;
3799 if (i == 0) {
3800 // MOVZ xD, h[0], LSL (16*0)
3801 expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3802 } else {
3803 // MOVK xD, h[i], LSL (16*i)
3804 expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3805 }
3806 if (p[i] != expected)
3807 return False;
3808 }
3809 return True;
3810 }
3811
3812
3813 /* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
3814 rD, using the given amode for the address. */
do_load_or_store8(UInt * p,Bool isLoad,UInt wD,ARM64AMode * am)3815 static UInt* do_load_or_store8 ( UInt* p,
3816 Bool isLoad, UInt wD, ARM64AMode* am )
3817 {
3818 vassert(wD <= 30);
3819 if (am->tag == ARM64am_RI9) {
3820 /* STURB Wd, [Xn|SP + simm9]: 00 111000 000 simm9 00 n d
3821 LDURB Wd, [Xn|SP + simm9]: 00 111000 010 simm9 00 n d
3822 */
3823 Int simm9 = am->ARM64am.RI9.simm9;
3824 vassert(-256 <= simm9 && simm9 <= 255);
3825 UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000,
3826 simm9 & 0x1FF, X00,
3827 iregNo(am->ARM64am.RI9.reg), wD);
3828 *p++ = instr;
3829 return p;
3830 }
3831 if (am->tag == ARM64am_RI12) {
3832 /* STRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 00 imm12 n d
3833 LDRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 01 imm12 n d
3834 */
3835 UInt uimm12 = am->ARM64am.RI12.uimm12;
3836 UInt scale = am->ARM64am.RI12.szB;
3837 vassert(scale == 1); /* failure of this is serious. Do not ignore. */
3838 UInt xN = iregNo(am->ARM64am.RI12.reg);
3839 vassert(xN <= 30);
3840 UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00,
3841 uimm12, xN, wD);
3842 *p++ = instr;
3843 return p;
3844 }
3845 if (am->tag == ARM64am_RR) {
3846 /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
3847 LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
3848 */
3849 UInt xN = iregNo(am->ARM64am.RR.base);
3850 UInt xM = iregNo(am->ARM64am.RR.index);
3851 vassert(xN <= 30);
3852 UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001,
3853 xM, X011010, xN, wD);
3854 *p++ = instr;
3855 return p;
3856 }
3857 vpanic("do_load_or_store8");
3858 vassert(0);
3859 }
3860
3861
3862 /* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
3863 rD, using the given amode for the address. */
do_load_or_store16(UInt * p,Bool isLoad,UInt wD,ARM64AMode * am)3864 static UInt* do_load_or_store16 ( UInt* p,
3865 Bool isLoad, UInt wD, ARM64AMode* am )
3866 {
3867 vassert(wD <= 30);
3868 if (am->tag == ARM64am_RI9) {
3869 /* STURH Wd, [Xn|SP + simm9]: 01 111000 000 simm9 00 n d
3870 LDURH Wd, [Xn|SP + simm9]: 01 111000 010 simm9 00 n d
3871 */
3872 Int simm9 = am->ARM64am.RI9.simm9;
3873 vassert(-256 <= simm9 && simm9 <= 255);
3874 UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000,
3875 simm9 & 0x1FF, X00,
3876 iregNo(am->ARM64am.RI9.reg), wD);
3877 *p++ = instr;
3878 return p;
3879 }
3880 if (am->tag == ARM64am_RI12) {
3881 /* STRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 00 imm12 n d
3882 LDRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 01 imm12 n d
3883 */
3884 UInt uimm12 = am->ARM64am.RI12.uimm12;
3885 UInt scale = am->ARM64am.RI12.szB;
3886 vassert(scale == 2); /* failure of this is serious. Do not ignore. */
3887 UInt xN = iregNo(am->ARM64am.RI12.reg);
3888 vassert(xN <= 30);
3889 UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00,
3890 uimm12, xN, wD);
3891 *p++ = instr;
3892 return p;
3893 }
3894 if (am->tag == ARM64am_RR) {
3895 /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
3896 LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
3897 */
3898 UInt xN = iregNo(am->ARM64am.RR.base);
3899 UInt xM = iregNo(am->ARM64am.RR.index);
3900 vassert(xN <= 30);
3901 UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001,
3902 xM, X011010, xN, wD);
3903 *p++ = instr;
3904 return p;
3905 }
3906 vpanic("do_load_or_store16");
3907 vassert(0);
3908 }
3909
3910
3911 /* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
3912 rD, using the given amode for the address. */
do_load_or_store32(UInt * p,Bool isLoad,UInt wD,ARM64AMode * am)3913 static UInt* do_load_or_store32 ( UInt* p,
3914 Bool isLoad, UInt wD, ARM64AMode* am )
3915 {
3916 vassert(wD <= 30);
3917 if (am->tag == ARM64am_RI9) {
3918 /* STUR Wd, [Xn|SP + simm9]: 10 111000 000 simm9 00 n d
3919 LDUR Wd, [Xn|SP + simm9]: 10 111000 010 simm9 00 n d
3920 */
3921 Int simm9 = am->ARM64am.RI9.simm9;
3922 vassert(-256 <= simm9 && simm9 <= 255);
3923 UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000,
3924 simm9 & 0x1FF, X00,
3925 iregNo(am->ARM64am.RI9.reg), wD);
3926 *p++ = instr;
3927 return p;
3928 }
3929 if (am->tag == ARM64am_RI12) {
3930 /* STR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 00 imm12 n d
3931 LDR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 01 imm12 n d
3932 */
3933 UInt uimm12 = am->ARM64am.RI12.uimm12;
3934 UInt scale = am->ARM64am.RI12.szB;
3935 vassert(scale == 4); /* failure of this is serious. Do not ignore. */
3936 UInt xN = iregNo(am->ARM64am.RI12.reg);
3937 vassert(xN <= 30);
3938 UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00,
3939 uimm12, xN, wD);
3940 *p++ = instr;
3941 return p;
3942 }
3943 if (am->tag == ARM64am_RR) {
3944 /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
3945 LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
3946 */
3947 UInt xN = iregNo(am->ARM64am.RR.base);
3948 UInt xM = iregNo(am->ARM64am.RR.index);
3949 vassert(xN <= 30);
3950 UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001,
3951 xM, X011010, xN, wD);
3952 *p++ = instr;
3953 return p;
3954 }
3955 vpanic("do_load_or_store32");
3956 vassert(0);
3957 }
3958
3959
3960 /* Generate a 64 bit load or store to/from xD, using the given amode
3961 for the address. */
do_load_or_store64(UInt * p,Bool isLoad,UInt xD,ARM64AMode * am)3962 static UInt* do_load_or_store64 ( UInt* p,
3963 Bool isLoad, UInt xD, ARM64AMode* am )
3964 {
3965 /* In all these cases, Rn can't be 31 since that means SP. */
3966 vassert(xD <= 30);
3967 if (am->tag == ARM64am_RI9) {
3968 /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d
3969 LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d
3970 */
3971 Int simm9 = am->ARM64am.RI9.simm9;
3972 vassert(-256 <= simm9 && simm9 <= 255);
3973 UInt xN = iregNo(am->ARM64am.RI9.reg);
3974 vassert(xN <= 30);
3975 UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000,
3976 simm9 & 0x1FF, X00, xN, xD);
3977 *p++ = instr;
3978 return p;
3979 }
3980 if (am->tag == ARM64am_RI12) {
3981 /* STR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 00 imm12 n d
3982 LDR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 01 imm12 n d
3983 */
3984 UInt uimm12 = am->ARM64am.RI12.uimm12;
3985 UInt scale = am->ARM64am.RI12.szB;
3986 vassert(scale == 8); /* failure of this is serious. Do not ignore. */
3987 UInt xN = iregNo(am->ARM64am.RI12.reg);
3988 vassert(xN <= 30);
3989 UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00,
3990 uimm12, xN, xD);
3991 *p++ = instr;
3992 return p;
3993 }
3994 if (am->tag == ARM64am_RR) {
3995 /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
3996 LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
3997 */
3998 UInt xN = iregNo(am->ARM64am.RR.base);
3999 UInt xM = iregNo(am->ARM64am.RR.index);
4000 vassert(xN <= 30);
4001 UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001,
4002 xM, X011010, xN, xD);
4003 *p++ = instr;
4004 return p;
4005 }
4006 vpanic("do_load_or_store64");
4007 vassert(0);
4008 }
4009
4010
4011 /* Emit an instruction into buf and return the number of bytes used.
4012 Note that buf is not the insn's final place, and therefore it is
4013 imperative to emit position-independent code. If the emitted
4014 instruction was a profiler inc, set *is_profInc to True, else
4015 leave it unchanged. */
4016
emit_ARM64Instr(Bool * is_profInc,UChar * buf,Int nbuf,ARM64Instr * i,Bool mode64,void * disp_cp_chain_me_to_slowEP,void * disp_cp_chain_me_to_fastEP,void * disp_cp_xindir,void * disp_cp_xassisted)4017 Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
4018 UChar* buf, Int nbuf, ARM64Instr* i,
4019 Bool mode64,
4020 void* disp_cp_chain_me_to_slowEP,
4021 void* disp_cp_chain_me_to_fastEP,
4022 void* disp_cp_xindir,
4023 void* disp_cp_xassisted )
4024 {
4025 UInt* p = (UInt*)buf;
4026 vassert(nbuf >= 32);
4027 vassert(mode64 == True);
4028 vassert(0 == (((HWord)buf) & 3));
4029
4030 switch (i->tag) {
4031 case ARM64in_Arith: {
4032 UInt rD = iregNo(i->ARM64in.Arith.dst);
4033 UInt rN = iregNo(i->ARM64in.Arith.argL);
4034 ARM64RIA* argR = i->ARM64in.Arith.argR;
4035 switch (argR->tag) {
4036 case ARM64riA_I12:
4037 *p++ = X_2_6_2_12_5_5(
4038 i->ARM64in.Arith.isAdd ? X10 : X11,
4039 X010001,
4040 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
4041 argR->ARM64riA.I12.imm12, rN, rD
4042 );
4043 break;
4044 case ARM64riA_R: {
4045 UInt rM = iregNo(i->ARM64in.Arith.argR->ARM64riA.R.reg);
4046 *p++ = X_3_8_5_6_5_5(
4047 i->ARM64in.Arith.isAdd ? X100 : X110,
4048 X01011000, rM, X000000, rN, rD
4049 );
4050 break;
4051 }
4052 default:
4053 goto bad;
4054 }
4055 goto done;
4056 }
4057 case ARM64in_Cmp: {
4058 UInt rD = 31; /* XZR, we are going to dump the result */
4059 UInt rN = iregNo(i->ARM64in.Cmp.argL);
4060 ARM64RIA* argR = i->ARM64in.Cmp.argR;
4061 Bool is64 = i->ARM64in.Cmp.is64;
4062 switch (argR->tag) {
4063 case ARM64riA_I12:
4064 /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
4065 /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
4066 *p++ = X_2_6_2_12_5_5(
4067 is64 ? X11 : X01, X110001,
4068 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
4069 argR->ARM64riA.I12.imm12, rN, rD);
4070 break;
4071 case ARM64riA_R: {
4072 /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
4073 /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
4074 UInt rM = iregNo(i->ARM64in.Cmp.argR->ARM64riA.R.reg);
4075 *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011,
4076 X01011000, rM, X000000, rN, rD);
4077 break;
4078 }
4079 default:
4080 goto bad;
4081 }
4082 goto done;
4083 }
4084 case ARM64in_Logic: {
4085 UInt rD = iregNo(i->ARM64in.Logic.dst);
4086 UInt rN = iregNo(i->ARM64in.Logic.argL);
4087 ARM64RIL* argR = i->ARM64in.Logic.argR;
4088 UInt opc = 0; /* invalid */
4089 vassert(rD < 31);
4090 vassert(rN < 31);
4091 switch (i->ARM64in.Logic.op) {
4092 case ARM64lo_OR: opc = X101; break;
4093 case ARM64lo_AND: opc = X100; break;
4094 case ARM64lo_XOR: opc = X110; break;
4095 default: break;
4096 }
4097 vassert(opc != 0);
4098 switch (argR->tag) {
4099 case ARM64riL_I13: {
4100 /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
4101 /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
4102 /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
4103 *p++ = X_3_6_1_6_6_5_5(
4104 opc, X100100, argR->ARM64riL.I13.bitN,
4105 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
4106 rN, rD
4107 );
4108 break;
4109 }
4110 case ARM64riL_R: {
4111 /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
4112 /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
4113 /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
4114 UInt rM = iregNo(argR->ARM64riL.R.reg);
4115 vassert(rM < 31);
4116 *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD);
4117 break;
4118 }
4119 default:
4120 goto bad;
4121 }
4122 goto done;
4123 }
4124 case ARM64in_Test: {
4125 UInt rD = 31; /* XZR, we are going to dump the result */
4126 UInt rN = iregNo(i->ARM64in.Test.argL);
4127 ARM64RIL* argR = i->ARM64in.Test.argR;
4128 switch (argR->tag) {
4129 case ARM64riL_I13: {
4130 /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
4131 *p++ = X_3_6_1_6_6_5_5(
4132 X111, X100100, argR->ARM64riL.I13.bitN,
4133 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
4134 rN, rD
4135 );
4136 break;
4137 }
4138 default:
4139 goto bad;
4140 }
4141 goto done;
4142 }
4143 case ARM64in_Shift: {
4144 UInt rD = iregNo(i->ARM64in.Shift.dst);
4145 UInt rN = iregNo(i->ARM64in.Shift.argL);
4146 ARM64RI6* argR = i->ARM64in.Shift.argR;
4147 vassert(rD < 31);
4148 vassert(rN < 31);
4149 switch (argR->tag) {
4150 case ARM64ri6_I6: {
4151 /* 110 1001101 (63-sh) (64-sh) nn dd LSL Xd, Xn, sh */
4152 /* 110 1001101 sh 63 nn dd LSR Xd, Xn, sh */
4153 /* 100 1001101 sh 63 nn dd ASR Xd, Xn, sh */
4154 UInt sh = argR->ARM64ri6.I6.imm6;
4155 vassert(sh > 0 && sh < 64);
4156 switch (i->ARM64in.Shift.op) {
4157 case ARM64sh_SHL:
4158 *p++ = X_3_6_1_6_6_5_5(X110, X100110,
4159 1, 64-sh, 63-sh, rN, rD);
4160 break;
4161 case ARM64sh_SHR:
4162 *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD);
4163 break;
4164 case ARM64sh_SAR:
4165 *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD);
4166 break;
4167 default:
4168 vassert(0);
4169 }
4170 break;
4171 }
4172 case ARM64ri6_R: {
4173 /* 100 1101 0110 mm 001000 nn dd LSL Xd, Xn, Xm */
4174 /* 100 1101 0110 mm 001001 nn dd LSR Xd, Xn, Xm */
4175 /* 100 1101 0110 mm 001010 nn dd ASR Xd, Xn, Xm */
4176 UInt rM = iregNo(argR->ARM64ri6.R.reg);
4177 vassert(rM < 31);
4178 UInt subOpc = 0;
4179 switch (i->ARM64in.Shift.op) {
4180 case ARM64sh_SHL: subOpc = X001000; break;
4181 case ARM64sh_SHR: subOpc = X001001; break;
4182 case ARM64sh_SAR: subOpc = X001010; break;
4183 default: vassert(0);
4184 }
4185 *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD);
4186 break;
4187 }
4188 default:
4189 vassert(0);
4190 }
4191 goto done;
4192 }
4193 case ARM64in_Unary: {
4194 UInt rDst = iregNo(i->ARM64in.Unary.dst);
4195 UInt rSrc = iregNo(i->ARM64in.Unary.src);
4196 switch (i->ARM64in.Unary.op) {
4197 case ARM64un_CLZ:
4198 /* 1 10 1101 0110 00000 00010 0 nn dd CLZ Xd, Xn */
4199 /* 1 10 1101 0110 00000 00010 1 nn dd CLS Xd, Xn (unimp) */
4200 *p++ = X_3_8_5_6_5_5(X110,
4201 X11010110, X00000, X000100, rSrc, rDst);
4202 goto done;
4203 case ARM64un_NEG:
4204 /* 1 10 01011 000 m 000000 11111 d NEG Xd,Xm */
4205 /* 0 10 01011 000 m 000000 11111 d NEG Wd,Wm (unimp) */
4206 *p++ = X_3_8_5_6_5_5(X110,
4207 X01011000, rSrc, X000000, X11111, rDst);
4208 goto done;
4209 case ARM64un_NOT: {
4210 /* 1 01 01010 00 1 m 000000 11111 d MVN Xd,Xm */
4211 *p++ = X_3_8_5_6_5_5(X101,
4212 X01010001, rSrc, X000000, X11111, rDst);
4213 goto done;
4214 }
4215 default:
4216 break;
4217 }
4218 goto bad;
4219 }
4220 case ARM64in_MovI: {
4221 /* We generate the "preferred form", ORR Xd, XZR, Xm
4222 101 01010 00 0 m 000000 11111 d
4223 */
4224 UInt instr = 0xAA0003E0;
4225 UInt d = iregNo(i->ARM64in.MovI.dst);
4226 UInt m = iregNo(i->ARM64in.MovI.src);
4227 *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0);
4228 goto done;
4229 }
4230 case ARM64in_Imm64: {
4231 p = imm64_to_iregNo( p, iregNo(i->ARM64in.Imm64.dst),
4232 i->ARM64in.Imm64.imm64 );
4233 goto done;
4234 }
4235 case ARM64in_LdSt64: {
4236 p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad,
4237 iregNo(i->ARM64in.LdSt64.rD),
4238 i->ARM64in.LdSt64.amode );
4239 goto done;
4240 }
4241 case ARM64in_LdSt32: {
4242 p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad,
4243 iregNo(i->ARM64in.LdSt32.rD),
4244 i->ARM64in.LdSt32.amode );
4245 goto done;
4246 }
4247 case ARM64in_LdSt16: {
4248 p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad,
4249 iregNo(i->ARM64in.LdSt16.rD),
4250 i->ARM64in.LdSt16.amode );
4251 goto done;
4252 }
4253 case ARM64in_LdSt8: {
4254 p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad,
4255 iregNo(i->ARM64in.LdSt8.rD),
4256 i->ARM64in.LdSt8.amode );
4257 goto done;
4258 }
4259 //ZZ case ARMin_LdSt32:
4260 //ZZ case ARMin_LdSt8U: {
4261 //ZZ UInt bL, bB;
4262 //ZZ HReg rD;
4263 //ZZ ARMAMode1* am;
4264 //ZZ ARMCondCode cc;
4265 //ZZ if (i->tag == ARMin_LdSt32) {
4266 //ZZ bB = 0;
4267 //ZZ bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
4268 //ZZ am = i->ARMin.LdSt32.amode;
4269 //ZZ rD = i->ARMin.LdSt32.rD;
4270 //ZZ cc = i->ARMin.LdSt32.cc;
4271 //ZZ } else {
4272 //ZZ bB = 1;
4273 //ZZ bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
4274 //ZZ am = i->ARMin.LdSt8U.amode;
4275 //ZZ rD = i->ARMin.LdSt8U.rD;
4276 //ZZ cc = i->ARMin.LdSt8U.cc;
4277 //ZZ }
4278 //ZZ vassert(cc != ARMcc_NV);
4279 //ZZ if (am->tag == ARMam1_RI) {
4280 //ZZ Int simm12;
4281 //ZZ UInt instr, bP;
4282 //ZZ if (am->ARMam1.RI.simm13 < 0) {
4283 //ZZ bP = 0;
4284 //ZZ simm12 = -am->ARMam1.RI.simm13;
4285 //ZZ } else {
4286 //ZZ bP = 1;
4287 //ZZ simm12 = am->ARMam1.RI.simm13;
4288 //ZZ }
4289 //ZZ vassert(simm12 >= 0 && simm12 <= 4095);
4290 //ZZ instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
4291 //ZZ iregNo(am->ARMam1.RI.reg),
4292 //ZZ iregNo(rD));
4293 //ZZ instr |= simm12;
4294 //ZZ *p++ = instr;
4295 //ZZ goto done;
4296 //ZZ } else {
4297 //ZZ // RR case
4298 //ZZ goto bad;
4299 //ZZ }
4300 //ZZ }
4301 //ZZ case ARMin_LdSt16: {
4302 //ZZ HReg rD = i->ARMin.LdSt16.rD;
4303 //ZZ UInt bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
4304 //ZZ UInt bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
4305 //ZZ ARMAMode2* am = i->ARMin.LdSt16.amode;
4306 //ZZ ARMCondCode cc = i->ARMin.LdSt16.cc;
4307 //ZZ vassert(cc != ARMcc_NV);
4308 //ZZ if (am->tag == ARMam2_RI) {
4309 //ZZ HReg rN = am->ARMam2.RI.reg;
4310 //ZZ Int simm8;
4311 //ZZ UInt bP, imm8hi, imm8lo, instr;
4312 //ZZ if (am->ARMam2.RI.simm9 < 0) {
4313 //ZZ bP = 0;
4314 //ZZ simm8 = -am->ARMam2.RI.simm9;
4315 //ZZ } else {
4316 //ZZ bP = 1;
4317 //ZZ simm8 = am->ARMam2.RI.simm9;
4318 //ZZ }
4319 //ZZ vassert(simm8 >= 0 && simm8 <= 255);
4320 //ZZ imm8hi = (simm8 >> 4) & 0xF;
4321 //ZZ imm8lo = simm8 & 0xF;
4322 //ZZ vassert(!(bL == 0 && bS == 1)); // "! signed store"
4323 //ZZ /**/ if (bL == 0 && bS == 0) {
4324 //ZZ // strh
4325 //ZZ instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregNo(rN),
4326 //ZZ iregNo(rD), imm8hi, X1011, imm8lo);
4327 //ZZ *p++ = instr;
4328 //ZZ goto done;
4329 //ZZ }
4330 //ZZ else if (bL == 1 && bS == 0) {
4331 //ZZ // ldrh
4332 //ZZ instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
4333 //ZZ iregNo(rD), imm8hi, X1011, imm8lo);
4334 //ZZ *p++ = instr;
4335 //ZZ goto done;
4336 //ZZ }
4337 //ZZ else if (bL == 1 && bS == 1) {
4338 //ZZ // ldrsh
4339 //ZZ instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
4340 //ZZ iregNo(rD), imm8hi, X1111, imm8lo);
4341 //ZZ *p++ = instr;
4342 //ZZ goto done;
4343 //ZZ }
4344 //ZZ else vassert(0); // ill-constructed insn
4345 //ZZ } else {
4346 //ZZ // RR case
4347 //ZZ goto bad;
4348 //ZZ }
4349 //ZZ }
4350 //ZZ case ARMin_Ld8S: {
4351 //ZZ HReg rD = i->ARMin.Ld8S.rD;
4352 //ZZ ARMAMode2* am = i->ARMin.Ld8S.amode;
4353 //ZZ ARMCondCode cc = i->ARMin.Ld8S.cc;
4354 //ZZ vassert(cc != ARMcc_NV);
4355 //ZZ if (am->tag == ARMam2_RI) {
4356 //ZZ HReg rN = am->ARMam2.RI.reg;
4357 //ZZ Int simm8;
4358 //ZZ UInt bP, imm8hi, imm8lo, instr;
4359 //ZZ if (am->ARMam2.RI.simm9 < 0) {
4360 //ZZ bP = 0;
4361 //ZZ simm8 = -am->ARMam2.RI.simm9;
4362 //ZZ } else {
4363 //ZZ bP = 1;
4364 //ZZ simm8 = am->ARMam2.RI.simm9;
4365 //ZZ }
4366 //ZZ vassert(simm8 >= 0 && simm8 <= 255);
4367 //ZZ imm8hi = (simm8 >> 4) & 0xF;
4368 //ZZ imm8lo = simm8 & 0xF;
4369 //ZZ // ldrsb
4370 //ZZ instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
4371 //ZZ iregNo(rD), imm8hi, X1101, imm8lo);
4372 //ZZ *p++ = instr;
4373 //ZZ goto done;
4374 //ZZ } else {
4375 //ZZ // RR case
4376 //ZZ goto bad;
4377 //ZZ }
4378 //ZZ }
4379
4380 case ARM64in_XDirect: {
4381 /* NB: what goes on here has to be very closely coordinated
4382 with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
4383 /* We're generating chain-me requests here, so we need to be
4384 sure this is actually allowed -- no-redir translations
4385 can't use chain-me's. Hence: */
4386 vassert(disp_cp_chain_me_to_slowEP != NULL);
4387 vassert(disp_cp_chain_me_to_fastEP != NULL);
4388
4389 /* Use ptmp for backpatching conditional jumps. */
4390 UInt* ptmp = NULL;
4391
4392 /* First off, if this is conditional, create a conditional
4393 jump over the rest of it. Or at least, leave a space for
4394 it that we will shortly fill in. */
4395 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
4396 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
4397 ptmp = p;
4398 *p++ = 0;
4399 }
4400
4401 /* Update the guest PC. */
4402 /* imm64 x9, dstGA */
4403 /* str x9, amPC */
4404 p = imm64_to_iregNo(p, /*x*/9, i->ARM64in.XDirect.dstGA);
4405 p = do_load_or_store64(p, False/*!isLoad*/,
4406 /*x*/9, i->ARM64in.XDirect.amPC);
4407
4408 /* --- FIRST PATCHABLE BYTE follows --- */
4409 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
4410 calling to) backs up the return address, so as to find the
4411 address of the first patchable byte. So: don't change the
4412 number of instructions (5) below. */
4413 /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
4414 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
4415 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
4416 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
4417 /* blr x9 */
4418 void* disp_cp_chain_me
4419 = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
4420 : disp_cp_chain_me_to_slowEP;
4421 p = imm64_to_iregNo_EXACTLY4(p, /*x*/9,
4422 Ptr_to_ULong(disp_cp_chain_me));
4423 *p++ = 0xD63F0120;
4424 /* --- END of PATCHABLE BYTES --- */
4425
4426 /* Fix up the conditional jump, if there was one. */
4427 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
4428 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
4429 vassert(delta > 0 && delta < 40);
4430 vassert((delta & 3) == 0);
4431 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
4432 vassert(notCond <= 13); /* Neither AL nor NV */
4433 vassert(ptmp != NULL);
4434 delta = delta >> 2;
4435 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
4436 }
4437 goto done;
4438 }
4439
4440 case ARM64in_XIndir: {
4441 // XIndir is more or less the same as XAssisted, except
4442 // we don't have a trc value to hand back, so there's no
4443 // write to r21
4444 /* Use ptmp for backpatching conditional jumps. */
4445 //UInt* ptmp = NULL;
4446
4447 /* First off, if this is conditional, create a conditional
4448 jump over the rest of it. Or at least, leave a space for
4449 it that we will shortly fill in. */
4450 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
4451 vassert(0); //ATC
4452 //ZZ vassert(i->ARMin.XIndir.cond != ARMcc_NV);
4453 //ZZ ptmp = p;
4454 //ZZ *p++ = 0;
4455 }
4456
4457 /* Update the guest PC. */
4458 /* str r-dstGA, amPC */
4459 p = do_load_or_store64(p, False/*!isLoad*/,
4460 iregNo(i->ARM64in.XIndir.dstGA),
4461 i->ARM64in.XIndir.amPC);
4462
4463 /* imm64 x9, VG_(disp_cp_xindir) */
4464 /* br x9 */
4465 p = imm64_to_iregNo(p, /*x*/9, Ptr_to_ULong(disp_cp_xindir));
4466 *p++ = 0xD61F0120; /* br x9 */
4467
4468 /* Fix up the conditional jump, if there was one. */
4469 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
4470 vassert(0); //ATC
4471 //ZZ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
4472 //ZZ vassert(delta > 0 && delta < 40);
4473 //ZZ vassert((delta & 3) == 0);
4474 //ZZ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
4475 //ZZ vassert(notCond <= 13); /* Neither AL nor NV */
4476 //ZZ delta = (delta >> 2) - 2;
4477 //ZZ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
4478 }
4479 goto done;
4480 }
4481
4482 case ARM64in_XAssisted: {
4483 /* Use ptmp for backpatching conditional jumps. */
4484 UInt* ptmp = NULL;
4485
4486 /* First off, if this is conditional, create a conditional
4487 jump over the rest of it. Or at least, leave a space for
4488 it that we will shortly fill in. I think this can only
4489 ever happen when VEX is driven by the switchbacker. */
4490 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
4491 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
4492 ptmp = p;
4493 *p++ = 0;
4494 }
4495
4496 /* Update the guest PC. */
4497 /* str r-dstGA, amPC */
4498 p = do_load_or_store64(p, False/*!isLoad*/,
4499 iregNo(i->ARM64in.XAssisted.dstGA),
4500 i->ARM64in.XAssisted.amPC);
4501
4502 /* movw r21, $magic_number */
4503 UInt trcval = 0;
4504 switch (i->ARM64in.XAssisted.jk) {
4505 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
4506 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
4507 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
4508 //case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
4509 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
4510 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
4511 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
4512 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
4513 case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
4514 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
4515 //case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
4516 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
4517 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
4518 /* We don't expect to see the following being assisted. */
4519 //case Ijk_Ret:
4520 //case Ijk_Call:
4521 /* fallthrough */
4522 default:
4523 ppIRJumpKind(i->ARM64in.XAssisted.jk);
4524 vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
4525 "unexpected jump kind");
4526 }
4527 vassert(trcval != 0);
4528 p = imm64_to_iregNo(p, /*x*/21, (ULong)trcval);
4529
4530 /* imm64 x9, VG_(disp_cp_xassisted) */
4531 /* br x9 */
4532 p = imm64_to_iregNo(p, /*x*/9, Ptr_to_ULong(disp_cp_xassisted));
4533 *p++ = 0xD61F0120; /* br x9 */
4534
4535 /* Fix up the conditional jump, if there was one. */
4536 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
4537 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
4538 vassert(delta > 0 && delta < 40);
4539 vassert((delta & 3) == 0);
4540 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
4541 vassert(notCond <= 13); /* Neither AL nor NV */
4542 vassert(ptmp != NULL);
4543 delta = delta >> 2;
4544 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
4545 }
4546 goto done;
4547 }
4548
4549 case ARM64in_CSel: {
4550 /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
4551 UInt dd = iregNo(i->ARM64in.CSel.dst);
4552 UInt nn = iregNo(i->ARM64in.CSel.argL);
4553 UInt mm = iregNo(i->ARM64in.CSel.argR);
4554 UInt cond = (UInt)i->ARM64in.CSel.cond;
4555 vassert(dd < 31 && nn < 31 && mm < 31 && cond < 16);
4556 *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd);
4557 goto done;
4558 }
4559
4560 case ARM64in_Call: {
4561 /* We'll use x9 as a scratch register to put the target
4562 address in. */
4563 if (i->ARM64in.Call.cond != ARM64cc_AL
4564 && i->ARM64in.Call.rloc.pri != RLPri_None) {
4565 /* The call might not happen (it isn't unconditional) and
4566 it returns a result. In this case we will need to
4567 generate a control flow diamond to put 0x555..555 in
4568 the return register(s) in the case where the call
4569 doesn't happen. If this ever becomes necessary, maybe
4570 copy code from the 32-bit ARM equivalent. Until that
4571 day, just give up. */
4572 goto bad;
4573 }
4574
4575 UInt* ptmp = NULL;
4576 if (i->ARM64in.Call.cond != ARM64cc_AL) {
4577 /* Create a hole to put a conditional branch in. We'll
4578 patch it once we know the branch length. */
4579 ptmp = p;
4580 *p++ = 0;
4581 }
4582
4583 // x9 = &target
4584 p = imm64_to_iregNo( (UInt*)p,
4585 /*x*/9, (ULong)i->ARM64in.Call.target );
4586 // blr x9
4587 *p++ = 0xD63F0120;
4588
4589 // Patch the hole if necessary
4590 if (i->ARM64in.Call.cond != ARM64cc_AL) {
4591 ULong dist = (ULong)(p - ptmp);
4592 /* imm64_to_iregNo produces between 1 and 4 insns, and
4593 then there's the BLR itself. Hence: */
4594 vassert(dist >= 2 && dist <= 5);
4595 vassert(ptmp != NULL);
4596 // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
4597 *ptmp = X_8_19_1_4(X01010100, dist, 0,
4598 1 ^ (UInt)i->ARM64in.Call.cond);
4599 } else {
4600 vassert(ptmp == NULL);
4601 }
4602
4603 goto done;
4604 }
4605
4606 case ARM64in_AddToSP: {
4607 /* 10,0 10001 00 imm12 11111 11111 ADD xsp, xsp, #imm12
4608 11,0 10001 00 imm12 11111 11111 SUB xsp, xsp, #imm12
4609 */
4610 Int simm12 = i->ARM64in.AddToSP.simm;
4611 vassert(-4096 < simm12 && simm12 < 4096);
4612 vassert(0 == (simm12 & 0xF));
4613 if (simm12 >= 0) {
4614 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111);
4615 } else {
4616 *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111);
4617 }
4618 goto done;
4619 }
4620
4621 case ARM64in_FromSP: {
4622 /* 10,0 10001 00 0..(12)..0 11111 dd MOV Xd, xsp */
4623 UInt dd = iregNo(i->ARM64in.FromSP.dst);
4624 vassert(dd < 31);
4625 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd);
4626 goto done;
4627 }
4628
4629 case ARM64in_Mul: {
4630 /* 100 11011 110 mm 011111 nn dd UMULH Xd, Xn,Xm
4631 100 11011 010 mm 011111 nn dd SMULH Xd, Xn,Xm
4632 100 11011 000 mm 011111 nn dd MUL Xd, Xn,Xm
4633 */
4634 UInt dd = iregNo(i->ARM64in.Mul.dst);
4635 UInt nn = iregNo(i->ARM64in.Mul.argL);
4636 UInt mm = iregNo(i->ARM64in.Mul.argR);
4637 vassert(dd < 31 && nn < 31 && mm < 31);
4638 switch (i->ARM64in.Mul.op) {
4639 case ARM64mul_ZX:
4640 *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd);
4641 goto done;
4642 case ARM64mul_SX:
4643 *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd);
4644 goto done;
4645 case ARM64mul_PLAIN:
4646 *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd);
4647 goto done;
4648 default:
4649 vassert(0);
4650 }
4651 goto bad;
4652 }
4653 case ARM64in_LdrEX: {
4654 /* 085F7C82 ldxrb w2, [x4]
4655 485F7C82 ldxrh w2, [x4]
4656 885F7C82 ldxr w2, [x4]
4657 C85F7C82 ldxr x2, [x4]
4658 */
4659 switch (i->ARM64in.LdrEX.szB) {
4660 case 1: *p++ = 0x085F7C82; goto done;
4661 case 2: *p++ = 0x485F7C82; goto done;
4662 case 4: *p++ = 0x885F7C82; goto done;
4663 case 8: *p++ = 0xC85F7C82; goto done;
4664 default: break;
4665 }
4666 goto bad;
4667 }
4668 case ARM64in_StrEX: {
4669 /* 08007C82 stxrb w0, w2, [x4]
4670 48007C82 stxrh w0, w2, [x4]
4671 88007C82 stxr w0, w2, [x4]
4672 C8007C82 stxr w0, x2, [x4]
4673 */
4674 switch (i->ARM64in.StrEX.szB) {
4675 case 1: *p++ = 0x08007C82; goto done;
4676 case 2: *p++ = 0x48007C82; goto done;
4677 case 4: *p++ = 0x88007C82; goto done;
4678 case 8: *p++ = 0xC8007C82; goto done;
4679 default: break;
4680 }
4681 goto bad;
4682 }
4683 case ARM64in_MFence: {
4684 *p++ = 0xD5033F9F; /* DSB sy */
4685 *p++ = 0xD5033FBF; /* DMB sy */
4686 *p++ = 0xD5033FDF; /* ISB */
4687 goto done;
4688 }
4689 //case ARM64in_CLREX: {
4690 // //ATC, but believed to be correct
4691 // goto bad;
4692 // *p++ = 0xD5033F5F; /* clrex */
4693 // goto done;
4694 //}
4695 case ARM64in_VLdStS: {
4696 /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4]
4697 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4]
4698 */
4699 UInt sD = dregNo(i->ARM64in.VLdStS.sD);
4700 UInt rN = iregNo(i->ARM64in.VLdStS.rN);
4701 UInt uimm12 = i->ARM64in.VLdStS.uimm12;
4702 Bool isLD = i->ARM64in.VLdStS.isLoad;
4703 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
4704 uimm12 >>= 2;
4705 vassert(uimm12 < (1<<12));
4706 vassert(sD < 32);
4707 vassert(rN < 31);
4708 *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00,
4709 uimm12, rN, sD);
4710 goto done;
4711 }
4712 case ARM64in_VLdStD: {
4713 /* 11 111101 01 imm12 n t LDR Dt, [Xn|SP, #imm12 * 8]
4714 11 111101 00 imm12 n t STR Dt, [Xn|SP, #imm12 * 8]
4715 */
4716 UInt dD = dregNo(i->ARM64in.VLdStD.dD);
4717 UInt rN = iregNo(i->ARM64in.VLdStD.rN);
4718 UInt uimm12 = i->ARM64in.VLdStD.uimm12;
4719 Bool isLD = i->ARM64in.VLdStD.isLoad;
4720 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
4721 uimm12 >>= 3;
4722 vassert(uimm12 < (1<<12));
4723 vassert(dD < 32);
4724 vassert(rN < 31);
4725 *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00,
4726 uimm12, rN, dD);
4727 goto done;
4728 }
4729 case ARM64in_VLdStQ: {
4730 /* 0100 1100 0000 0000 0111 11 rN rQ st1 {vQ.2d}, [<rN|SP>]
4731 0100 1100 0100 0000 0111 11 rN rQ ld1 {vQ.2d}, [<rN|SP>]
4732 */
4733 UInt rQ = qregNo(i->ARM64in.VLdStQ.rQ);
4734 UInt rN = iregNo(i->ARM64in.VLdStQ.rN);
4735 vassert(rQ < 32);
4736 vassert(rN < 31);
4737 if (i->ARM64in.VLdStQ.isLoad) {
4738 *p++ = 0x4C407C00 | (rN << 5) | rQ;
4739 } else {
4740 *p++ = 0x4C007C00 | (rN << 5) | rQ;
4741 }
4742 goto done;
4743 }
4744 case ARM64in_VCvtI2F: {
4745 /* 31 28 23 21 20 18 15 9 4
4746 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
4747 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
4748 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
4749 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
4750 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
4751 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
4752 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
4753 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
4754 */
4755 UInt rN = iregNo(i->ARM64in.VCvtI2F.rS);
4756 UInt rD = dregNo(i->ARM64in.VCvtI2F.rD);
4757 ARM64CvtOp how = i->ARM64in.VCvtI2F.how;
4758 /* Just handle cases as they show up. */
4759 switch (how) {
4760 case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */
4761 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD);
4762 break;
4763 case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */
4764 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD);
4765 break;
4766 case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */
4767 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD);
4768 break;
4769 case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
4770 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
4771 break;
4772 case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
4773 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
4774 break;
4775 case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
4776 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
4777 break;
4778 case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */
4779 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD);
4780 break;
4781 case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn */
4782 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD);
4783 break;
4784 default:
4785 goto bad; //ATC
4786 }
4787 goto done;
4788 }
4789 case ARM64in_VCvtF2I: {
4790 /* 30 23 20 18 15 9 4
4791 sf 00,11110,0x 1 00 000,000000 n d FCVTNS Rd, Fn (round to
4792 sf 00,11110,0x 1 00 001,000000 n d FCVTNU Rd, Fn nearest)
4793 ---------------- 01 -------------- FCVTP-------- (round to +inf)
4794 ---------------- 10 -------------- FCVTM-------- (round to -inf)
4795 ---------------- 11 -------------- FCVTZ-------- (round to zero)
4796
4797 Rd is Xd when sf==1, Wd when sf==0
4798 Fn is Dn when x==1, Sn when x==0
4799 20:19 carry the rounding mode, using the same encoding as FPCR
4800 */
4801 UInt rD = iregNo(i->ARM64in.VCvtF2I.rD);
4802 UInt rN = dregNo(i->ARM64in.VCvtF2I.rS);
4803 ARM64CvtOp how = i->ARM64in.VCvtF2I.how;
4804 UChar armRM = i->ARM64in.VCvtF2I.armRM;
4805 /* Just handle cases as they show up. */
4806 switch (how) {
4807 case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
4808 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3),
4809 X000000, rN, rD);
4810 break;
4811 case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
4812 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3),
4813 X000000, rN, rD);
4814 break;
4815 case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
4816 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3),
4817 X000000, rN, rD);
4818 break;
4819 case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
4820 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
4821 X000000, rN, rD);
4822 break;
4823 case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
4824 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
4825 X000000, rN, rD);
4826 break;
4827 case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
4828 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
4829 X000000, rN, rD);
4830 break;
4831 case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
4832 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
4833 X000000, rN, rD);
4834 break;
4835 case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
4836 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
4837 X000000, rN, rD);
4838 break;
4839 default:
4840 goto bad; //ATC
4841 }
4842 goto done;
4843 }
4844 case ARM64in_VCvtSD: {
4845 /* 31 23 21 16 14 9 4
4846 000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D)
4847 ---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S)
4848 Rounding, when dst is smaller than src, is per the FPCR.
4849 */
4850 UInt dd = dregNo(i->ARM64in.VCvtSD.dst);
4851 UInt nn = dregNo(i->ARM64in.VCvtSD.src);
4852 if (i->ARM64in.VCvtSD.sToD) {
4853 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd);
4854 } else {
4855 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd);
4856 }
4857 goto done;
4858 }
4859 case ARM64in_VUnaryD: {
4860 /* 31 23 21 16 14 9 4
4861 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled)
4862 ------------------- 0,1 --------- FABS ------
4863 ------------------- 1,0 --------- FNEG ------
4864 ------------------- 1,1 --------- FQSRT -----
4865 */
4866 UInt dD = dregNo(i->ARM64in.VUnaryD.dst);
4867 UInt dN = dregNo(i->ARM64in.VUnaryD.src);
4868 UInt b16 = 2; /* impossible */
4869 UInt b15 = 2; /* impossible */
4870 switch (i->ARM64in.VUnaryD.op) {
4871 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4872 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4873 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4874 default: break;
4875 }
4876 if (b16 < 2 && b15 < 2) {
4877 *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16,
4878 (b15 << 5) | X10000, dN, dD);
4879 goto done;
4880 }
4881 /*
4882 000, 11110 01 1,001 11,1 10000 n d FRINTI Dd, Dm (round per FPCR)
4883 */
4884 if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) {
4885 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
4886 goto done;
4887 }
4888 goto bad;
4889 }
4890 case ARM64in_VUnaryS: {
4891 /* 31 23 21 16 14 9 4
4892 000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled)
4893 ------------------- 0,1 --------- FABS ------
4894 ------------------- 1,0 --------- FNEG ------
4895 ------------------- 1,1 --------- FQSRT -----
4896 */
4897 UInt sD = dregNo(i->ARM64in.VUnaryS.dst);
4898 UInt sN = dregNo(i->ARM64in.VUnaryS.src);
4899 UInt b16 = 2; /* impossible */
4900 UInt b15 = 2; /* impossible */
4901 switch (i->ARM64in.VUnaryS.op) {
4902 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4903 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4904 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4905 default: break;
4906 }
4907 if (b16 < 2 && b15 < 2) {
4908 *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16,
4909 (b15 << 5) | X10000, sN, sD);
4910 goto done;
4911 }
4912 /*
4913 000, 11110 00 1,001 11,1 10000 n d FRINTI Sd, Sm (round per FPCR)
4914 */
4915 if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) {
4916 *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
4917 goto done;
4918 }
4919 goto bad;
4920 }
4921 case ARM64in_VBinD: {
4922 /* 31 23 20 15 11 9 4
4923 ---------------- 0000 ------ FMUL --------
4924 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
4925 ---------------- 0010 ------ FADD --------
4926 ---------------- 0011 ------ FSUB --------
4927 */
4928 UInt dD = dregNo(i->ARM64in.VBinD.dst);
4929 UInt dN = dregNo(i->ARM64in.VBinD.argL);
4930 UInt dM = dregNo(i->ARM64in.VBinD.argR);
4931 UInt b1512 = 16; /* impossible */
4932 switch (i->ARM64in.VBinD.op) {
4933 case ARM64fpb_DIV: b1512 = X0001; break;
4934 case ARM64fpb_MUL: b1512 = X0000; break;
4935 case ARM64fpb_SUB: b1512 = X0011; break;
4936 case ARM64fpb_ADD: b1512 = X0010; break;
4937 default: goto bad;
4938 }
4939 vassert(b1512 < 16);
4940 *p++
4941 = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD);
4942 goto done;
4943 }
4944 case ARM64in_VBinS: {
4945 /* 31 23 20 15 11 9 4
4946 ---------------- 0000 ------ FMUL --------
4947 000 11110 001 m 0001 10 n d FDIV Dd,Dn,Dm
4948 ---------------- 0010 ------ FADD --------
4949 ---------------- 0011 ------ FSUB --------
4950 */
4951 UInt sD = dregNo(i->ARM64in.VBinS.dst);
4952 UInt sN = dregNo(i->ARM64in.VBinS.argL);
4953 UInt sM = dregNo(i->ARM64in.VBinS.argR);
4954 UInt b1512 = 16; /* impossible */
4955 switch (i->ARM64in.VBinS.op) {
4956 case ARM64fpb_DIV: b1512 = X0001; break;
4957 case ARM64fpb_MUL: b1512 = X0000; break;
4958 case ARM64fpb_SUB: b1512 = X0011; break;
4959 case ARM64fpb_ADD: b1512 = X0010; break;
4960 default: goto bad;
4961 }
4962 vassert(b1512 < 16);
4963 *p++
4964 = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
4965 goto done;
4966 }
4967 case ARM64in_VCmpD: {
4968 /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */
4969 UInt dN = dregNo(i->ARM64in.VCmpD.argL);
4970 UInt dM = dregNo(i->ARM64in.VCmpD.argR);
4971 *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000);
4972 goto done;
4973 }
4974 case ARM64in_VCmpS: {
4975 /* 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm */
4976 UInt sN = dregNo(i->ARM64in.VCmpS.argL);
4977 UInt sM = dregNo(i->ARM64in.VCmpS.argR);
4978 *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000);
4979 goto done;
4980 }
4981 case ARM64in_FPCR: {
4982 Bool toFPCR = i->ARM64in.FPCR.toFPCR;
4983 UInt iReg = iregNo(i->ARM64in.FPCR.iReg);
4984 if (toFPCR) {
4985 /* 0xD51B44 000 Rt MSR fpcr, rT */
4986 *p++ = 0xD51B4400 | (iReg & 0x1F);
4987 goto done;
4988 }
4989 goto bad; // FPCR -> iReg case currently ATC
4990 }
4991 case ARM64in_VBinV: {
4992 /* 31 23 20 15 9 4
4993 010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d
4994 010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s
4995 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h
4996 010 01110 00 1 m 100001 n d ADD Vd.16b, Vn.16b, Vm.16b
4997
4998 011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d
4999 011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s
5000 011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h
5001 011 01110 00 1 m 100001 n d SUB Vd.16b, Vn.16b, Vm.16b
5002
5003 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s
5004 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h
5005 010 01110 00 1 m 100111 n d MUL Vd.16b, Vn.16b, Vm.16b
5006
5007 010 01110 01 1 m 110101 n d FADD Vd.2d, Vn.2d, Vm.2d
5008 010 01110 00 1 m 110101 n d FADD Vd.4s, Vn.4s, Vm.4s
5009 010 01110 11 1 m 110101 n d FSUB Vd.2d, Vn.2d, Vm.2d
5010 010 01110 10 1 m 110101 n d FSUB Vd.4s, Vn.4s, Vm.4s
5011
5012 011 01110 01 1 m 110111 n d FMUL Vd.2d, Vn.2d, Vm.2d
5013 011 01110 00 1 m 110111 n d FMUL Vd.4s, Vn.4s, Vm.4s
5014 011 01110 01 1 m 111111 n d FDIV Vd.2d, Vn.2d, Vm.2d
5015 011 01110 00 1 m 111111 n d FDIV Vd.4s, Vn.4s, Vm.4s
5016
5017 011 01110 10 1 m 011001 n d UMAX Vd.4s, Vn.4s, Vm.4s
5018 011 01110 01 1 m 011001 n d UMAX Vd.8h, Vn.8h, Vm.8h
5019 011 01110 00 1 m 011001 n d UMAX Vd.16b, Vn.16b, Vm.16b
5020
5021 011 01110 10 1 m 011011 n d UMIN Vd.4s, Vn.4s, Vm.4s
5022 011 01110 01 1 m 011011 n d UMIN Vd.8h, Vn.8h, Vm.8h
5023 011 01110 00 1 m 011011 n d UMIN Vd.16b, Vn.16b, Vm.16b
5024
5025 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s
5026 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h
5027 010 01110 00 1 m 011001 n d SMAX Vd.16b, Vn.16b, Vm.16b
5028
5029 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s
5030 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h
5031 010 01110 00 1 m 011011 n d SMIN Vd.16b, Vn.16b, Vm.16b
5032
5033 010 01110 00 1 m 000111 n d AND Vd, Vn, Vm
5034 010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm
5035 011 01110 00 1 m 000111 n d EOR Vd, Vn, Vm
5036
5037 011 01110 11 1 m 100011 n d CMEQ Vd.2d, Vn.2d, Vm.2d
5038 011 01110 10 1 m 100011 n d CMEQ Vd.4s, Vn.4s, Vm.4s
5039 011 01110 01 1 m 100011 n d CMEQ Vd.8h, Vn.8h, Vm.8h
5040 011 01110 00 1 m 100011 n d CMEQ Vd.16b, Vn.16b, Vm.16b
5041
5042 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d
5043 011 01110 10 1 m 001101 n d CMHI Vd.4s, Vn.4s, Vm.4s
5044 011 01110 01 1 m 001101 n d CMHI Vd.8h, Vn.8h, Vm.8h
5045 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b
5046
5047 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d
5048 010 01110 10 1 m 001101 n d CMGT Vd.4s, Vn.4s, Vm.4s
5049 010 01110 01 1 m 001101 n d CMGT Vd.8h, Vn.8h, Vm.8h
5050 010 01110 00 1 m 001101 n d CMGT Vd.16b, Vn.16b, Vm.16b
5051
5052 010 01110 01 1 m 111001 n d FCMEQ Vd.2d, Vn.2d, Vm.2d
5053 010 01110 00 1 m 111001 n d FCMEQ Vd.4s, Vn.4s, Vm.4s
5054
5055 011 01110 01 1 m 111001 n d FCMGE Vd.2d, Vn.2d, Vm.2d
5056 011 01110 00 1 m 111001 n d FCMGE Vd.4s, Vn.4s, Vm.4s
5057
5058 011 01110 11 1 m 111001 n d FCMGT Vd.2d, Vn.2d, Vm.2d
5059 011 01110 10 1 m 111001 n d FCMGT Vd.4s, Vn.4s, Vm.4s
5060
5061 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b
5062
5063 */
5064 UInt vD = qregNo(i->ARM64in.VBinV.dst);
5065 ARM64VecBinOp op = i->ARM64in.VBinV.op;
5066 Bool isV128 = (op != ARM64vecb_UMULL8x8
5067 && op != ARM64vecb_UMULL16x4
5068 && op != ARM64vecb_UMULL32x2);
5069 UInt vN = isV128 ? qregNo(i->ARM64in.VBinV.argL)
5070 : dregNo(i->ARM64in.VBinV.argL);
5071 UInt vM = isV128 ? qregNo(i->ARM64in.VBinV.argR)
5072 : dregNo(i->ARM64in.VBinV.argR);
5073 switch (i->ARM64in.VBinV.op) {
5074 case ARM64vecb_ADD64x2:
5075 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
5076 break;
5077 case ARM64vecb_ADD32x4:
5078 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD);
5079 break;
5080 case ARM64vecb_ADD16x8:
5081 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
5082 break;
5083 case ARM64vecb_ADD8x16:
5084 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD);
5085 break;
5086 case ARM64vecb_SUB64x2:
5087 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
5088 break;
5089 case ARM64vecb_SUB32x4:
5090 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100001, vN, vD);
5091 break;
5092 case ARM64vecb_SUB16x8:
5093 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
5094 break;
5095 case ARM64vecb_SUB8x16:
5096 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD);
5097 break;
5098 case ARM64vecb_MUL32x4:
5099 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
5100 break;
5101 case ARM64vecb_MUL16x8:
5102 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD);
5103 break;
5104 case ARM64vecb_MUL8x16:
5105 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD);
5106 break;
5107 case ARM64vecb_FADD64x2:
5108 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD);
5109 break;
5110 case ARM64vecb_FADD32x4:
5111 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X110101, vN, vD);
5112 break;
5113 case ARM64vecb_FSUB64x2:
5114 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X110101, vN, vD);
5115 break;
5116 case ARM64vecb_FSUB32x4:
5117 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X110101, vN, vD);
5118 break;
5119 case ARM64vecb_FMUL64x2:
5120 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X110111, vN, vD);
5121 break;
5122 case ARM64vecb_FMUL32x4:
5123 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X110111, vN, vD);
5124 break;
5125 case ARM64vecb_FDIV64x2:
5126 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111111, vN, vD);
5127 break;
5128 case ARM64vecb_FDIV32x4:
5129 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD);
5130 break;
5131
5132 case ARM64vecb_UMAX32x4:
5133 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD);
5134 break;
5135 case ARM64vecb_UMAX16x8:
5136 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011001, vN, vD);
5137 break;
5138 case ARM64vecb_UMAX8x16:
5139 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011001, vN, vD);
5140 break;
5141
5142 case ARM64vecb_UMIN32x4:
5143 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011011, vN, vD);
5144 break;
5145 case ARM64vecb_UMIN16x8:
5146 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD);
5147 break;
5148 case ARM64vecb_UMIN8x16:
5149 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD);
5150 break;
5151
5152 case ARM64vecb_UMULL32x2:
5153 *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD);
5154 break;
5155 case ARM64vecb_UMULL16x4:
5156 *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD);
5157 break;
5158 case ARM64vecb_UMULL8x8:
5159 *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD);
5160 break;
5161
5162 case ARM64vecb_SMAX32x4:
5163 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
5164 break;
5165 case ARM64vecb_SMAX16x8:
5166 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
5167 break;
5168 case ARM64vecb_SMAX8x16:
5169 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD);
5170 break;
5171
5172 case ARM64vecb_SMIN32x4:
5173 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
5174 break;
5175 case ARM64vecb_SMIN16x8:
5176 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
5177 break;
5178 case ARM64vecb_SMIN8x16:
5179 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD);
5180 break;
5181
5182 case ARM64vecb_AND:
5183 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD);
5184 break;
5185 case ARM64vecb_ORR:
5186 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD);
5187 break;
5188 case ARM64vecb_XOR:
5189 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD);
5190 break;
5191
5192 case ARM64vecb_CMEQ64x2:
5193 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD);
5194 break;
5195 case ARM64vecb_CMEQ32x4:
5196 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100011, vN, vD);
5197 break;
5198 case ARM64vecb_CMEQ16x8:
5199 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100011, vN, vD);
5200 break;
5201 case ARM64vecb_CMEQ8x16:
5202 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD);
5203 break;
5204
5205 case ARM64vecb_CMHI64x2:
5206 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001101, vN, vD);
5207 break;
5208 case ARM64vecb_CMHI32x4:
5209 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001101, vN, vD);
5210 break;
5211 case ARM64vecb_CMHI16x8:
5212 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001101, vN, vD);
5213 break;
5214 case ARM64vecb_CMHI8x16:
5215 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001101, vN, vD);
5216 break;
5217
5218 case ARM64vecb_CMGT64x2:
5219 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001101, vN, vD);
5220 break;
5221 case ARM64vecb_CMGT32x4:
5222 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001101, vN, vD);
5223 break;
5224 case ARM64vecb_CMGT16x8:
5225 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001101, vN, vD);
5226 break;
5227 case ARM64vecb_CMGT8x16:
5228 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001101, vN, vD);
5229 break;
5230
5231 case ARM64vecb_FCMEQ64x2:
5232 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD);
5233 break;
5234 case ARM64vecb_FCMEQ32x4:
5235 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD);
5236 break;
5237
5238 case ARM64vecb_FCMGE64x2:
5239 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD);
5240 break;
5241 case ARM64vecb_FCMGE32x4:
5242 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD);
5243 break;
5244
5245 case ARM64vecb_FCMGT64x2:
5246 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD);
5247 break;
5248 case ARM64vecb_FCMGT32x4:
5249 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
5250 break;
5251
5252 case ARM64vecb_TBL1:
5253 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD);
5254 break;
5255
5256 default:
5257 goto bad;
5258 }
5259 goto done;
5260 }
5261 case ARM64in_VUnaryV: {
5262 /* 31 23 20 15 9 4
5263 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d
5264 010 01110 10 1 00000 111110 n d FABS Vd.4s, Vn.4s
5265 011 01110 11 1 00000 111110 n d FNEG Vd.2d, Vn.2d
5266 011 01110 10 1 00000 111110 n d FNEG Vd.4s, Vn.4s
5267 010 01110 00 1 00000 010110 n d CNT Vd.16b, Vn.16b
5268 011 01110 00 1 00000 010110 n d NOT Vd.16b, Vn.16b
5269
5270 011 01110 00 1 10000 001110 n d UADDLV Hd, Vn.16b
5271 011 01110 01 1 10000 001110 n d UADDLV Sd, Vn.8h
5272 011 01110 10 1 10000 001110 n d UADDLV Dd, Vn.4s
5273
5274 010 01110 00 1 10000 001110 n d SADDLV Hd, Vn.16b
5275 010 01110 01 1 10000 001110 n d SADDLV Sd, Vn.8h
5276 010 01110 10 1 10000 001110 n d SADDLV Dd, Vn.4s
5277 */
5278 ARM64VecUnaryOp op = i->ARM64in.VUnaryV.op;
5279 UInt vD = qregNo(i->ARM64in.VUnaryV.dst);
5280 Bool isV128 = !(op >= ARM64vecu_VMOVL8U && op <= ARM64vecu_VMOVL32S);
5281 UInt vN = isV128 ? qregNo(i->ARM64in.VUnaryV.arg)
5282 : dregNo(i->ARM64in.VUnaryV.arg);
5283 switch (i->ARM64in.VUnaryV.op) {
5284 case ARM64vecu_FABS64x2:
5285 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
5286 break;
5287 case ARM64vecu_FABS32x4:
5288 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD);
5289 break;
5290 case ARM64vecu_FNEG64x2:
5291 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
5292 break;
5293 case ARM64vecu_FNEG32x4:
5294 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
5295 break;
5296 case ARM64vecu_VMOVL8U:
5297 *p++ = X_9_1_6_4_6_1_1_4(X111100111, vD >> 4, X001000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
5298 break;
5299 case ARM64vecu_VMOVL16U:
5300 *p++ = X_9_1_6_4_6_1_1_4(X111100111, vD >> 4, X010000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
5301 break;
5302 case ARM64vecu_VMOVL32U:
5303 *p++ = X_9_1_6_4_6_1_1_4(X111100111, vD >> 4, X100000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
5304 break;
5305 case ARM64vecu_VMOVL8S:
5306 *p++ = X_9_1_6_4_6_1_1_4(X111100101, vD >> 4, X001000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
5307 break;
5308 case ARM64vecu_VMOVL16S:
5309 *p++ = X_9_1_6_4_6_1_1_4(X111100101, vD >> 4, X010000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
5310 break;
5311 case ARM64vecu_VMOVL32S:
5312 *p++ = X_9_1_6_4_6_1_1_4(X111100101, vD >> 4, X100000, vD & 0xFU, X101000, vN >> 4, 1, vN & 0xFU);
5313 break;
5314 case ARM64vecu_NOT:
5315 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
5316 break;
5317 case ARM64vecu_CNT:
5318 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
5319 break;
5320 case ARM64vecu_UADDLV8x16:
5321 *p++ = X_3_8_5_6_5_5(X011, X01110001, X10000, X001110, vN, vD);
5322 break;
5323 case ARM64vecu_UADDLV16x8:
5324 *p++ = X_3_8_5_6_5_5(X011, X01110011, X10000, X001110, vN, vD);
5325 break;
5326 case ARM64vecu_UADDLV32x4:
5327 *p++ = X_3_8_5_6_5_5(X011, X01110101, X10000, X001110, vN, vD);
5328 break;
5329 case ARM64vecu_SADDLV8x16:
5330 *p++ = X_3_8_5_6_5_5(X010, X01110001, X10000, X001110, vN, vD);
5331 break;
5332 case ARM64vecu_SADDLV16x8:
5333 *p++ = X_3_8_5_6_5_5(X010, X01110011, X10000, X001110, vN, vD);
5334 break;
5335 case ARM64vecu_SADDLV32x4:
5336 *p++ = X_3_8_5_6_5_5(X010, X01110101, X10000, X001110, vN, vD);
5337 break;
5338 default:
5339 goto bad;
5340 }
5341 goto done;
5342 }
5343 case ARM64in_VNarrowV: {
5344 /* 31 23 21 15 9 4
5345 000 01110 00 1,00001 001010 n d XTN Vd.8b, Vn.8h
5346 000 01110 01 1,00001 001010 n d XTN Vd.4h, Vn.4s
5347 000 01110 10 1,00001 001010 n d XTN Vd.2s, Vn.2d
5348 */
5349 UInt vD = qregNo(i->ARM64in.VNarrowV.dst);
5350 UInt vN = qregNo(i->ARM64in.VNarrowV.src);
5351 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
5352 vassert(dszBlg2 >= 0 && dszBlg2 <= 2);
5353 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5354 X00001, X001010, vN, vD);
5355 goto done;
5356 }
5357 case ARM64in_VShiftImmV: {
5358 /*
5359 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
5360 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
5361 where immh:immb
5362 = case T of
5363 2d | sh in 1..63 -> let xxxxxx = 64-sh in 1xxx:xxx
5364 4s | sh in 1..31 -> let xxxxx = 32-sh in 01xx:xxx
5365 8h | sh in 1..15 -> let xxxx = 16-sh in 001x:xxx
5366 16b | sh in 1..7 -> let xxx = 8-sh in 0001:xxx
5367
5368 0q0 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
5369 where immh:immb
5370 = case T of
5371 2d | sh in 1..63 -> let xxxxxx = sh in 1xxx:xxx
5372 4s | sh in 1..31 -> let xxxxx = sh in 01xx:xxx
5373 8h | sh in 1..15 -> let xxxx = sh in 001x:xxx
5374 16b | sh in 1..7 -> let xxx = sh in 0001:xxx
5375 */
5376 UInt vD = qregNo(i->ARM64in.VShiftImmV.dst);
5377 UInt vN = qregNo(i->ARM64in.VShiftImmV.src);
5378 UInt sh = i->ARM64in.VShiftImmV.amt;
5379 ARM64VecShiftOp op = i->ARM64in.VShiftImmV.op;
5380 Bool syned = False;
5381 switch (op) {
5382 /* 64x2 cases */
5383 case ARM64vecsh_SSHR64x2: syned = True;
5384 case ARM64vecsh_USHR64x2: /* fallthrough */
5385 if (sh >= 1 && sh <= 63) {
5386 UInt xxxxxx = 64-sh;
5387 *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
5388 X1000000 | xxxxxx, X000001, vN, vD);
5389 goto done;
5390 }
5391 break;
5392 case ARM64vecsh_SHL64x2:
5393 if (sh >= 1 && sh <= 63) {
5394 UInt xxxxxx = sh;
5395 *p++ = X_3_6_7_6_5_5(X010, X011110,
5396 X1000000 | xxxxxx, X010101, vN, vD);
5397 goto done;
5398 }
5399 break;
5400 /* 32x4 cases */
5401 case ARM64vecsh_SSHR32x4: syned = True;
5402 case ARM64vecsh_USHR32x4: /* fallthrough */
5403 if (sh >= 1 && sh <= 31) {
5404 UInt xxxxx = 32-sh;
5405 *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
5406 X0100000 | xxxxx, X000001, vN, vD);
5407 goto done;
5408 }
5409 break;
5410 case ARM64vecsh_SHL32x4:
5411 if (sh >= 1 && sh <= 31) {
5412 UInt xxxxx = sh;
5413 *p++ = X_3_6_7_6_5_5(X010, X011110,
5414 X0100000 | xxxxx, X010101, vN, vD);
5415 goto done;
5416 }
5417 break;
5418 /* 16x8 cases */
5419 case ARM64vecsh_SSHR16x8: syned = True;
5420 case ARM64vecsh_USHR16x8: /* fallthrough */
5421 if (sh >= 1 && sh <= 15) {
5422 UInt xxxx = 16-sh;
5423 *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
5424 X0010000 | xxxx, X000001, vN, vD);
5425 goto done;
5426 }
5427 break;
5428 case ARM64vecsh_SHL16x8:
5429 if (sh >= 1 && sh <= 15) {
5430 UInt xxxx = sh;
5431 *p++ = X_3_6_7_6_5_5(X010, X011110,
5432 X0010000 | xxxx, X010101, vN, vD);
5433 goto done;
5434 }
5435 break;
5436
5437
5438 /* 8x16 cases */
5439 case ARM64vecsh_SSHR8x16: syned = True;
5440 case ARM64vecsh_USHR8x16: /* fallthrough */
5441 if (sh >= 1 && sh <= 7) {
5442 UInt xxx = 8-sh;
5443 *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110,
5444 X0001000 | xxx, X000001, vN, vD);
5445 goto done;
5446 }
5447 break;
5448 case ARM64vecsh_SHL8x16:
5449 if (sh >= 1 && sh <= 7) {
5450 UInt xxx = sh;
5451 *p++ = X_3_6_7_6_5_5(X010, X011110,
5452 X0001000 | xxx, X010101, vN, vD);
5453 goto done;
5454 }
5455 break;
5456
5457 default:
5458 break;
5459 }
5460 goto bad;
5461 }
5462 //ZZ case ARMin_VAluS: {
5463 //ZZ UInt dN = fregNo(i->ARMin.VAluS.argL);
5464 //ZZ UInt dD = fregNo(i->ARMin.VAluS.dst);
5465 //ZZ UInt dM = fregNo(i->ARMin.VAluS.argR);
5466 //ZZ UInt bN = dN & 1;
5467 //ZZ UInt bD = dD & 1;
5468 //ZZ UInt bM = dM & 1;
5469 //ZZ UInt pqrs = X1111; /* undefined */
5470 //ZZ switch (i->ARMin.VAluS.op) {
5471 //ZZ case ARMvfp_ADD: pqrs = X0110; break;
5472 //ZZ case ARMvfp_SUB: pqrs = X0111; break;
5473 //ZZ case ARMvfp_MUL: pqrs = X0100; break;
5474 //ZZ case ARMvfp_DIV: pqrs = X1000; break;
5475 //ZZ default: goto bad;
5476 //ZZ }
5477 //ZZ vassert(pqrs != X1111);
5478 //ZZ UInt bP = (pqrs >> 3) & 1;
5479 //ZZ UInt bQ = (pqrs >> 2) & 1;
5480 //ZZ UInt bR = (pqrs >> 1) & 1;
5481 //ZZ UInt bS = (pqrs >> 0) & 1;
5482 //ZZ UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
5483 //ZZ (dN >> 1), (dD >> 1),
5484 //ZZ X1010, BITS4(bN,bS,bM,0), (dM >> 1));
5485 //ZZ *p++ = insn;
5486 //ZZ goto done;
5487 //ZZ }
5488 //ZZ case ARMin_VUnaryS: {
5489 //ZZ UInt fD = fregNo(i->ARMin.VUnaryS.dst);
5490 //ZZ UInt fM = fregNo(i->ARMin.VUnaryS.src);
5491 //ZZ UInt insn = 0;
5492 //ZZ switch (i->ARMin.VUnaryS.op) {
5493 //ZZ case ARMvfpu_COPY:
5494 //ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
5495 //ZZ (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
5496 //ZZ (fM >> 1));
5497 //ZZ break;
5498 //ZZ case ARMvfpu_ABS:
5499 //ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
5500 //ZZ (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
5501 //ZZ (fM >> 1));
5502 //ZZ break;
5503 //ZZ case ARMvfpu_NEG:
5504 //ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
5505 //ZZ (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
5506 //ZZ (fM >> 1));
5507 //ZZ break;
5508 //ZZ case ARMvfpu_SQRT:
5509 //ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
5510 //ZZ (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
5511 //ZZ (fM >> 1));
5512 //ZZ break;
5513 //ZZ default:
5514 //ZZ goto bad;
5515 //ZZ }
5516 //ZZ *p++ = insn;
5517 //ZZ goto done;
5518 //ZZ }
5519 //ZZ case ARMin_VCMovD: {
5520 //ZZ UInt cc = (UInt)i->ARMin.VCMovD.cond;
5521 //ZZ UInt dD = dregNo(i->ARMin.VCMovD.dst);
5522 //ZZ UInt dM = dregNo(i->ARMin.VCMovD.src);
5523 //ZZ vassert(cc < 16 && cc != ARMcc_AL);
5524 //ZZ UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
5525 //ZZ *p++ = insn;
5526 //ZZ goto done;
5527 //ZZ }
5528 //ZZ case ARMin_VCMovS: {
5529 //ZZ UInt cc = (UInt)i->ARMin.VCMovS.cond;
5530 //ZZ UInt fD = fregNo(i->ARMin.VCMovS.dst);
5531 //ZZ UInt fM = fregNo(i->ARMin.VCMovS.src);
5532 //ZZ vassert(cc < 16 && cc != ARMcc_AL);
5533 //ZZ UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
5534 //ZZ X0000,(fD >> 1),X1010,
5535 //ZZ BITS4(0,1,(fM & 1),0), (fM >> 1));
5536 //ZZ *p++ = insn;
5537 //ZZ goto done;
5538 //ZZ }
5539 //ZZ case ARMin_VXferD: {
5540 //ZZ UInt dD = dregNo(i->ARMin.VXferD.dD);
5541 //ZZ UInt rHi = iregNo(i->ARMin.VXferD.rHi);
5542 //ZZ UInt rLo = iregNo(i->ARMin.VXferD.rLo);
5543 //ZZ /* vmov dD, rLo, rHi is
5544 //ZZ E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
5545 //ZZ vmov rLo, rHi, dD is
5546 //ZZ E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
5547 //ZZ */
5548 //ZZ UInt insn
5549 //ZZ = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
5550 //ZZ rHi, rLo, 0xB,
5551 //ZZ BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
5552 //ZZ *p++ = insn;
5553 //ZZ goto done;
5554 //ZZ }
5555 //ZZ case ARMin_VXferS: {
5556 //ZZ UInt fD = fregNo(i->ARMin.VXferS.fD);
5557 //ZZ UInt rLo = iregNo(i->ARMin.VXferS.rLo);
5558 //ZZ /* vmov fD, rLo is
5559 //ZZ E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
5560 //ZZ vmov rLo, fD is
5561 //ZZ E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
5562 //ZZ */
5563 //ZZ UInt insn
5564 //ZZ = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
5565 //ZZ (fD >> 1) & 0xF, rLo, 0xA,
5566 //ZZ BITS4((fD & 1),0,0,1), 0);
5567 //ZZ *p++ = insn;
5568 //ZZ goto done;
5569 //ZZ }
5570 //ZZ case ARMin_VCvtID: {
5571 //ZZ Bool iToD = i->ARMin.VCvtID.iToD;
5572 //ZZ Bool syned = i->ARMin.VCvtID.syned;
5573 //ZZ if (iToD && syned) {
5574 //ZZ // FSITOD: I32S-in-freg to F64-in-dreg
5575 //ZZ UInt regF = fregNo(i->ARMin.VCvtID.src);
5576 //ZZ UInt regD = dregNo(i->ARMin.VCvtID.dst);
5577 //ZZ UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
5578 //ZZ X1011, BITS4(1,1,(regF & 1),0),
5579 //ZZ (regF >> 1) & 0xF);
5580 //ZZ *p++ = insn;
5581 //ZZ goto done;
5582 //ZZ }
5583 //ZZ if (iToD && (!syned)) {
5584 //ZZ // FUITOD: I32U-in-freg to F64-in-dreg
5585 //ZZ UInt regF = fregNo(i->ARMin.VCvtID.src);
5586 //ZZ UInt regD = dregNo(i->ARMin.VCvtID.dst);
5587 //ZZ UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
5588 //ZZ X1011, BITS4(0,1,(regF & 1),0),
5589 //ZZ (regF >> 1) & 0xF);
5590 //ZZ *p++ = insn;
5591 //ZZ goto done;
5592 //ZZ }
5593 //ZZ if ((!iToD) && syned) {
5594 //ZZ // FTOSID: F64-in-dreg to I32S-in-freg
5595 //ZZ UInt regD = dregNo(i->ARMin.VCvtID.src);
5596 //ZZ UInt regF = fregNo(i->ARMin.VCvtID.dst);
5597 //ZZ UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
5598 //ZZ X1101, (regF >> 1) & 0xF,
5599 //ZZ X1011, X0100, regD);
5600 //ZZ *p++ = insn;
5601 //ZZ goto done;
5602 //ZZ }
5603 //ZZ if ((!iToD) && (!syned)) {
5604 //ZZ // FTOUID: F64-in-dreg to I32U-in-freg
5605 //ZZ UInt regD = dregNo(i->ARMin.VCvtID.src);
5606 //ZZ UInt regF = fregNo(i->ARMin.VCvtID.dst);
5607 //ZZ UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
5608 //ZZ X1100, (regF >> 1) & 0xF,
5609 //ZZ X1011, X0100, regD);
5610 //ZZ *p++ = insn;
5611 //ZZ goto done;
5612 //ZZ }
5613 //ZZ /*UNREACHED*/
5614 //ZZ vassert(0);
5615 //ZZ }
5616 //ZZ case ARMin_NLdStD: {
5617 //ZZ UInt regD = dregNo(i->ARMin.NLdStD.dD);
5618 //ZZ UInt regN, regM;
5619 //ZZ UInt D = regD >> 4;
5620 //ZZ UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
5621 //ZZ UInt insn;
5622 //ZZ vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
5623 //ZZ regD &= 0xF;
5624 //ZZ if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
5625 //ZZ regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
5626 //ZZ regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
5627 //ZZ } else {
5628 //ZZ regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
5629 //ZZ regM = 15;
5630 //ZZ }
5631 //ZZ insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
5632 //ZZ regN, regD, X0111, X1000, regM);
5633 //ZZ *p++ = insn;
5634 //ZZ goto done;
5635 //ZZ }
5636 //ZZ case ARMin_NUnaryS: {
5637 //ZZ UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
5638 //ZZ UInt regD, D;
5639 //ZZ UInt regM, M;
5640 //ZZ UInt size = i->ARMin.NUnaryS.size;
5641 //ZZ UInt insn;
5642 //ZZ UInt opc, opc1, opc2;
5643 //ZZ switch (i->ARMin.NUnaryS.op) {
5644 //ZZ case ARMneon_VDUP:
5645 //ZZ if (i->ARMin.NUnaryS.size >= 16)
5646 //ZZ goto bad;
5647 //ZZ if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
5648 //ZZ goto bad;
5649 //ZZ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
5650 //ZZ goto bad;
5651 //ZZ regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
5652 //ZZ ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
5653 //ZZ : dregNo(i->ARMin.NUnaryS.dst->reg);
5654 //ZZ regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
5655 //ZZ ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
5656 //ZZ : dregNo(i->ARMin.NUnaryS.src->reg);
5657 //ZZ D = regD >> 4;
5658 //ZZ M = regM >> 4;
5659 //ZZ regD &= 0xf;
5660 //ZZ regM &= 0xf;
5661 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
5662 //ZZ (i->ARMin.NUnaryS.size & 0xf), regD,
5663 //ZZ X1100, BITS4(0,Q,M,0), regM);
5664 //ZZ *p++ = insn;
5665 //ZZ goto done;
5666 //ZZ case ARMneon_SETELEM:
5667 //ZZ regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
5668 //ZZ dregNo(i->ARMin.NUnaryS.dst->reg);
5669 //ZZ regM = iregNo(i->ARMin.NUnaryS.src->reg);
5670 //ZZ M = regM >> 4;
5671 //ZZ D = regD >> 4;
5672 //ZZ regM &= 0xF;
5673 //ZZ regD &= 0xF;
5674 //ZZ if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
5675 //ZZ goto bad;
5676 //ZZ switch (size) {
5677 //ZZ case 0:
5678 //ZZ if (i->ARMin.NUnaryS.dst->index > 7)
5679 //ZZ goto bad;
5680 //ZZ opc = X1000 | i->ARMin.NUnaryS.dst->index;
5681 //ZZ break;
5682 //ZZ case 1:
5683 //ZZ if (i->ARMin.NUnaryS.dst->index > 3)
5684 //ZZ goto bad;
5685 //ZZ opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
5686 //ZZ break;
5687 //ZZ case 2:
5688 //ZZ if (i->ARMin.NUnaryS.dst->index > 1)
5689 //ZZ goto bad;
5690 //ZZ opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
5691 //ZZ break;
5692 //ZZ default:
5693 //ZZ goto bad;
5694 //ZZ }
5695 //ZZ opc1 = (opc >> 2) & 3;
5696 //ZZ opc2 = opc & 3;
5697 //ZZ insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
5698 //ZZ regD, regM, X1011,
5699 //ZZ BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
5700 //ZZ *p++ = insn;
5701 //ZZ goto done;
5702 //ZZ case ARMneon_GETELEMU:
5703 //ZZ regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
5704 //ZZ dregNo(i->ARMin.NUnaryS.src->reg);
5705 //ZZ regD = iregNo(i->ARMin.NUnaryS.dst->reg);
5706 //ZZ M = regM >> 4;
5707 //ZZ D = regD >> 4;
5708 //ZZ regM &= 0xF;
5709 //ZZ regD &= 0xF;
5710 //ZZ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
5711 //ZZ goto bad;
5712 //ZZ switch (size) {
5713 //ZZ case 0:
5714 //ZZ if (Q && i->ARMin.NUnaryS.src->index > 7) {
5715 //ZZ regM++;
5716 //ZZ i->ARMin.NUnaryS.src->index -= 8;
5717 //ZZ }
5718 //ZZ if (i->ARMin.NUnaryS.src->index > 7)
5719 //ZZ goto bad;
5720 //ZZ opc = X1000 | i->ARMin.NUnaryS.src->index;
5721 //ZZ break;
5722 //ZZ case 1:
5723 //ZZ if (Q && i->ARMin.NUnaryS.src->index > 3) {
5724 //ZZ regM++;
5725 //ZZ i->ARMin.NUnaryS.src->index -= 4;
5726 //ZZ }
5727 //ZZ if (i->ARMin.NUnaryS.src->index > 3)
5728 //ZZ goto bad;
5729 //ZZ opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
5730 //ZZ break;
5731 //ZZ case 2:
5732 //ZZ goto bad;
5733 //ZZ default:
5734 //ZZ goto bad;
5735 //ZZ }
5736 //ZZ opc1 = (opc >> 2) & 3;
5737 //ZZ opc2 = opc & 3;
5738 //ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
5739 //ZZ regM, regD, X1011,
5740 //ZZ BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
5741 //ZZ *p++ = insn;
5742 //ZZ goto done;
5743 //ZZ case ARMneon_GETELEMS:
5744 //ZZ regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
5745 //ZZ dregNo(i->ARMin.NUnaryS.src->reg);
5746 //ZZ regD = iregNo(i->ARMin.NUnaryS.dst->reg);
5747 //ZZ M = regM >> 4;
5748 //ZZ D = regD >> 4;
5749 //ZZ regM &= 0xF;
5750 //ZZ regD &= 0xF;
5751 //ZZ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
5752 //ZZ goto bad;
5753 //ZZ switch (size) {
5754 //ZZ case 0:
5755 //ZZ if (Q && i->ARMin.NUnaryS.src->index > 7) {
5756 //ZZ regM++;
5757 //ZZ i->ARMin.NUnaryS.src->index -= 8;
5758 //ZZ }
5759 //ZZ if (i->ARMin.NUnaryS.src->index > 7)
5760 //ZZ goto bad;
5761 //ZZ opc = X1000 | i->ARMin.NUnaryS.src->index;
5762 //ZZ break;
5763 //ZZ case 1:
5764 //ZZ if (Q && i->ARMin.NUnaryS.src->index > 3) {
5765 //ZZ regM++;
5766 //ZZ i->ARMin.NUnaryS.src->index -= 4;
5767 //ZZ }
5768 //ZZ if (i->ARMin.NUnaryS.src->index > 3)
5769 //ZZ goto bad;
5770 //ZZ opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
5771 //ZZ break;
5772 //ZZ case 2:
5773 //ZZ if (Q && i->ARMin.NUnaryS.src->index > 1) {
5774 //ZZ regM++;
5775 //ZZ i->ARMin.NUnaryS.src->index -= 2;
5776 //ZZ }
5777 //ZZ if (i->ARMin.NUnaryS.src->index > 1)
5778 //ZZ goto bad;
5779 //ZZ opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
5780 //ZZ break;
5781 //ZZ default:
5782 //ZZ goto bad;
5783 //ZZ }
5784 //ZZ opc1 = (opc >> 2) & 3;
5785 //ZZ opc2 = opc & 3;
5786 //ZZ insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
5787 //ZZ regM, regD, X1011,
5788 //ZZ BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
5789 //ZZ *p++ = insn;
5790 //ZZ goto done;
5791 //ZZ default:
5792 //ZZ goto bad;
5793 //ZZ }
5794 //ZZ }
5795 //ZZ case ARMin_NUnary: {
5796 //ZZ UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
5797 //ZZ UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
5798 //ZZ ? (qregNo(i->ARMin.NUnary.dst) << 1)
5799 //ZZ : dregNo(i->ARMin.NUnary.dst);
5800 //ZZ UInt regM, M;
5801 //ZZ UInt D = regD >> 4;
5802 //ZZ UInt sz1 = i->ARMin.NUnary.size >> 1;
5803 //ZZ UInt sz2 = i->ARMin.NUnary.size & 1;
5804 //ZZ UInt sz = i->ARMin.NUnary.size;
5805 //ZZ UInt insn;
5806 //ZZ UInt F = 0; /* TODO: floating point EQZ ??? */
5807 //ZZ if (i->ARMin.NUnary.op != ARMneon_DUP) {
5808 //ZZ regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
5809 //ZZ ? (qregNo(i->ARMin.NUnary.src) << 1)
5810 //ZZ : dregNo(i->ARMin.NUnary.src);
5811 //ZZ M = regM >> 4;
5812 //ZZ } else {
5813 //ZZ regM = iregNo(i->ARMin.NUnary.src);
5814 //ZZ M = regM >> 4;
5815 //ZZ }
5816 //ZZ regD &= 0xF;
5817 //ZZ regM &= 0xF;
5818 //ZZ switch (i->ARMin.NUnary.op) {
5819 //ZZ case ARMneon_COPY: /* VMOV reg, reg */
5820 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
5821 //ZZ BITS4(M,Q,M,1), regM);
5822 //ZZ break;
5823 //ZZ case ARMneon_COPYN: /* VMOVN regD, regQ */
5824 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
5825 //ZZ regD, X0010, BITS4(0,0,M,0), regM);
5826 //ZZ break;
5827 //ZZ case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
5828 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
5829 //ZZ regD, X0010, BITS4(1,0,M,0), regM);
5830 //ZZ break;
5831 //ZZ case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
5832 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
5833 //ZZ regD, X0010, BITS4(0,1,M,0), regM);
5834 //ZZ break;
5835 //ZZ case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
5836 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
5837 //ZZ regD, X0010, BITS4(1,1,M,0), regM);
5838 //ZZ break;
5839 //ZZ case ARMneon_COPYLS: /* VMOVL regQ, regD */
5840 //ZZ if (sz >= 3)
5841 //ZZ goto bad;
5842 //ZZ insn = XXXXXXXX(0xF, X0010,
5843 //ZZ BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
5844 //ZZ BITS4((sz == 0) ? 1 : 0,0,0,0),
5845 //ZZ regD, X1010, BITS4(0,0,M,1), regM);
5846 //ZZ break;
5847 //ZZ case ARMneon_COPYLU: /* VMOVL regQ, regD */
5848 //ZZ if (sz >= 3)
5849 //ZZ goto bad;
5850 //ZZ insn = XXXXXXXX(0xF, X0011,
5851 //ZZ BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
5852 //ZZ BITS4((sz == 0) ? 1 : 0,0,0,0),
5853 //ZZ regD, X1010, BITS4(0,0,M,1), regM);
5854 //ZZ break;
5855 //ZZ case ARMneon_NOT: /* VMVN reg, reg*/
5856 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
5857 //ZZ BITS4(1,Q,M,0), regM);
5858 //ZZ break;
5859 //ZZ case ARMneon_EQZ:
5860 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
5861 //ZZ regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
5862 //ZZ break;
5863 //ZZ case ARMneon_CNT:
5864 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
5865 //ZZ BITS4(0,Q,M,0), regM);
5866 //ZZ break;
5867 //ZZ case ARMneon_CLZ:
5868 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
5869 //ZZ regD, X0100, BITS4(1,Q,M,0), regM);
5870 //ZZ break;
5871 //ZZ case ARMneon_CLS:
5872 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
5873 //ZZ regD, X0100, BITS4(0,Q,M,0), regM);
5874 //ZZ break;
5875 //ZZ case ARMneon_ABS:
5876 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
5877 //ZZ regD, X0011, BITS4(0,Q,M,0), regM);
5878 //ZZ break;
5879 //ZZ case ARMneon_DUP:
5880 //ZZ sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
5881 //ZZ sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
5882 //ZZ vassert(sz1 + sz2 < 2);
5883 //ZZ insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
5884 //ZZ X1011, BITS4(D,0,sz2,1), X0000);
5885 //ZZ break;
5886 //ZZ case ARMneon_REV16:
5887 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
5888 //ZZ regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
5889 //ZZ break;
5890 //ZZ case ARMneon_REV32:
5891 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
5892 //ZZ regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
5893 //ZZ break;
5894 //ZZ case ARMneon_REV64:
5895 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
5896 //ZZ regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
5897 //ZZ break;
5898 //ZZ case ARMneon_PADDLU:
5899 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
5900 //ZZ regD, X0010, BITS4(1,Q,M,0), regM);
5901 //ZZ break;
5902 //ZZ case ARMneon_PADDLS:
5903 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
5904 //ZZ regD, X0010, BITS4(0,Q,M,0), regM);
5905 //ZZ break;
5906 //ZZ case ARMneon_VQSHLNUU:
5907 //ZZ insn = XXXXXXXX(0xF, X0011,
5908 //ZZ (1 << 3) | (D << 2) | ((sz >> 4) & 3),
5909 //ZZ sz & 0xf, regD, X0111,
5910 //ZZ BITS4(sz >> 6,Q,M,1), regM);
5911 //ZZ break;
5912 //ZZ case ARMneon_VQSHLNSS:
5913 //ZZ insn = XXXXXXXX(0xF, X0010,
5914 //ZZ (1 << 3) | (D << 2) | ((sz >> 4) & 3),
5915 //ZZ sz & 0xf, regD, X0111,
5916 //ZZ BITS4(sz >> 6,Q,M,1), regM);
5917 //ZZ break;
5918 //ZZ case ARMneon_VQSHLNUS:
5919 //ZZ insn = XXXXXXXX(0xF, X0011,
5920 //ZZ (1 << 3) | (D << 2) | ((sz >> 4) & 3),
5921 //ZZ sz & 0xf, regD, X0110,
5922 //ZZ BITS4(sz >> 6,Q,M,1), regM);
5923 //ZZ break;
5924 //ZZ case ARMneon_VCVTFtoS:
5925 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
5926 //ZZ BITS4(0,Q,M,0), regM);
5927 //ZZ break;
5928 //ZZ case ARMneon_VCVTFtoU:
5929 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
5930 //ZZ BITS4(1,Q,M,0), regM);
5931 //ZZ break;
5932 //ZZ case ARMneon_VCVTStoF:
5933 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
5934 //ZZ BITS4(0,Q,M,0), regM);
5935 //ZZ break;
5936 //ZZ case ARMneon_VCVTUtoF:
5937 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
5938 //ZZ BITS4(1,Q,M,0), regM);
5939 //ZZ break;
5940 //ZZ case ARMneon_VCVTFtoFixedU:
5941 //ZZ sz1 = (sz >> 5) & 1;
5942 //ZZ sz2 = (sz >> 4) & 1;
5943 //ZZ sz &= 0xf;
5944 //ZZ insn = XXXXXXXX(0xF, X0011,
5945 //ZZ BITS4(1,D,sz1,sz2), sz, regD, X1111,
5946 //ZZ BITS4(0,Q,M,1), regM);
5947 //ZZ break;
5948 //ZZ case ARMneon_VCVTFtoFixedS:
5949 //ZZ sz1 = (sz >> 5) & 1;
5950 //ZZ sz2 = (sz >> 4) & 1;
5951 //ZZ sz &= 0xf;
5952 //ZZ insn = XXXXXXXX(0xF, X0010,
5953 //ZZ BITS4(1,D,sz1,sz2), sz, regD, X1111,
5954 //ZZ BITS4(0,Q,M,1), regM);
5955 //ZZ break;
5956 //ZZ case ARMneon_VCVTFixedUtoF:
5957 //ZZ sz1 = (sz >> 5) & 1;
5958 //ZZ sz2 = (sz >> 4) & 1;
5959 //ZZ sz &= 0xf;
5960 //ZZ insn = XXXXXXXX(0xF, X0011,
5961 //ZZ BITS4(1,D,sz1,sz2), sz, regD, X1110,
5962 //ZZ BITS4(0,Q,M,1), regM);
5963 //ZZ break;
5964 //ZZ case ARMneon_VCVTFixedStoF:
5965 //ZZ sz1 = (sz >> 5) & 1;
5966 //ZZ sz2 = (sz >> 4) & 1;
5967 //ZZ sz &= 0xf;
5968 //ZZ insn = XXXXXXXX(0xF, X0010,
5969 //ZZ BITS4(1,D,sz1,sz2), sz, regD, X1110,
5970 //ZZ BITS4(0,Q,M,1), regM);
5971 //ZZ break;
5972 //ZZ case ARMneon_VCVTF32toF16:
5973 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
5974 //ZZ BITS4(0,0,M,0), regM);
5975 //ZZ break;
5976 //ZZ case ARMneon_VCVTF16toF32:
5977 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
5978 //ZZ BITS4(0,0,M,0), regM);
5979 //ZZ break;
5980 //ZZ case ARMneon_VRECIP:
5981 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
5982 //ZZ BITS4(0,Q,M,0), regM);
5983 //ZZ break;
5984 //ZZ case ARMneon_VRECIPF:
5985 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
5986 //ZZ BITS4(0,Q,M,0), regM);
5987 //ZZ break;
5988 //ZZ case ARMneon_VABSFP:
5989 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
5990 //ZZ BITS4(0,Q,M,0), regM);
5991 //ZZ break;
5992 //ZZ case ARMneon_VRSQRTEFP:
5993 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
5994 //ZZ BITS4(1,Q,M,0), regM);
5995 //ZZ break;
5996 //ZZ case ARMneon_VRSQRTE:
5997 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
5998 //ZZ BITS4(1,Q,M,0), regM);
5999 //ZZ break;
6000 //ZZ case ARMneon_VNEGF:
6001 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
6002 //ZZ BITS4(1,Q,M,0), regM);
6003 //ZZ break;
6004 //ZZ
6005 //ZZ default:
6006 //ZZ goto bad;
6007 //ZZ }
6008 //ZZ *p++ = insn;
6009 //ZZ goto done;
6010 //ZZ }
6011 //ZZ case ARMin_NDual: {
6012 //ZZ UInt Q = i->ARMin.NDual.Q ? 1 : 0;
6013 //ZZ UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
6014 //ZZ ? (qregNo(i->ARMin.NDual.arg1) << 1)
6015 //ZZ : dregNo(i->ARMin.NDual.arg1);
6016 //ZZ UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
6017 //ZZ ? (qregNo(i->ARMin.NDual.arg2) << 1)
6018 //ZZ : dregNo(i->ARMin.NDual.arg2);
6019 //ZZ UInt D = regD >> 4;
6020 //ZZ UInt M = regM >> 4;
6021 //ZZ UInt sz1 = i->ARMin.NDual.size >> 1;
6022 //ZZ UInt sz2 = i->ARMin.NDual.size & 1;
6023 //ZZ UInt insn;
6024 //ZZ regD &= 0xF;
6025 //ZZ regM &= 0xF;
6026 //ZZ switch (i->ARMin.NDual.op) {
6027 //ZZ case ARMneon_TRN: /* VTRN reg, reg */
6028 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
6029 //ZZ regD, X0000, BITS4(1,Q,M,0), regM);
6030 //ZZ break;
6031 //ZZ case ARMneon_ZIP: /* VZIP reg, reg */
6032 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
6033 //ZZ regD, X0001, BITS4(1,Q,M,0), regM);
6034 //ZZ break;
6035 //ZZ case ARMneon_UZP: /* VUZP reg, reg */
6036 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
6037 //ZZ regD, X0001, BITS4(0,Q,M,0), regM);
6038 //ZZ break;
6039 //ZZ default:
6040 //ZZ goto bad;
6041 //ZZ }
6042 //ZZ *p++ = insn;
6043 //ZZ goto done;
6044 //ZZ }
6045 //ZZ case ARMin_NBinary: {
6046 //ZZ UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
6047 //ZZ UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
6048 //ZZ ? (qregNo(i->ARMin.NBinary.dst) << 1)
6049 //ZZ : dregNo(i->ARMin.NBinary.dst);
6050 //ZZ UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
6051 //ZZ ? (qregNo(i->ARMin.NBinary.argL) << 1)
6052 //ZZ : dregNo(i->ARMin.NBinary.argL);
6053 //ZZ UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
6054 //ZZ ? (qregNo(i->ARMin.NBinary.argR) << 1)
6055 //ZZ : dregNo(i->ARMin.NBinary.argR);
6056 //ZZ UInt sz1 = i->ARMin.NBinary.size >> 1;
6057 //ZZ UInt sz2 = i->ARMin.NBinary.size & 1;
6058 //ZZ UInt D = regD >> 4;
6059 //ZZ UInt N = regN >> 4;
6060 //ZZ UInt M = regM >> 4;
6061 //ZZ UInt insn;
6062 //ZZ regD &= 0xF;
6063 //ZZ regM &= 0xF;
6064 //ZZ regN &= 0xF;
6065 //ZZ switch (i->ARMin.NBinary.op) {
6066 //ZZ case ARMneon_VAND: /* VAND reg, reg, reg */
6067 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
6068 //ZZ BITS4(N,Q,M,1), regM);
6069 //ZZ break;
6070 //ZZ case ARMneon_VORR: /* VORR reg, reg, reg*/
6071 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
6072 //ZZ BITS4(N,Q,M,1), regM);
6073 //ZZ break;
6074 //ZZ case ARMneon_VXOR: /* VEOR reg, reg, reg */
6075 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
6076 //ZZ BITS4(N,Q,M,1), regM);
6077 //ZZ break;
6078 //ZZ case ARMneon_VADD: /* VADD reg, reg, reg */
6079 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6080 //ZZ X1000, BITS4(N,Q,M,0), regM);
6081 //ZZ break;
6082 //ZZ case ARMneon_VSUB: /* VSUB reg, reg, reg */
6083 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6084 //ZZ X1000, BITS4(N,Q,M,0), regM);
6085 //ZZ break;
6086 //ZZ case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
6087 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6088 //ZZ X0110, BITS4(N,Q,M,1), regM);
6089 //ZZ break;
6090 //ZZ case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
6091 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6092 //ZZ X0110, BITS4(N,Q,M,1), regM);
6093 //ZZ break;
6094 //ZZ case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
6095 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6096 //ZZ X0110, BITS4(N,Q,M,0), regM);
6097 //ZZ break;
6098 //ZZ case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
6099 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6100 //ZZ X0110, BITS4(N,Q,M,0), regM);
6101 //ZZ break;
6102 //ZZ case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
6103 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6104 //ZZ X0001, BITS4(N,Q,M,0), regM);
6105 //ZZ break;
6106 //ZZ case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
6107 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6108 //ZZ X0001, BITS4(N,Q,M,0), regM);
6109 //ZZ break;
6110 //ZZ case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
6111 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6112 //ZZ X0000, BITS4(N,Q,M,1), regM);
6113 //ZZ break;
6114 //ZZ case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
6115 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6116 //ZZ X0000, BITS4(N,Q,M,1), regM);
6117 //ZZ break;
6118 //ZZ case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
6119 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6120 //ZZ X0010, BITS4(N,Q,M,1), regM);
6121 //ZZ break;
6122 //ZZ case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
6123 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6124 //ZZ X0010, BITS4(N,Q,M,1), regM);
6125 //ZZ break;
6126 //ZZ case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
6127 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6128 //ZZ X0011, BITS4(N,Q,M,0), regM);
6129 //ZZ break;
6130 //ZZ case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
6131 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6132 //ZZ X0011, BITS4(N,Q,M,0), regM);
6133 //ZZ break;
6134 //ZZ case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
6135 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6136 //ZZ X0011, BITS4(N,Q,M,1), regM);
6137 //ZZ break;
6138 //ZZ case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
6139 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6140 //ZZ X0011, BITS4(N,Q,M,1), regM);
6141 //ZZ break;
6142 //ZZ case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
6143 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6144 //ZZ X1000, BITS4(N,Q,M,1), regM);
6145 //ZZ break;
6146 //ZZ case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
6147 //ZZ if (i->ARMin.NBinary.size >= 16)
6148 //ZZ goto bad;
6149 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
6150 //ZZ i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
6151 //ZZ regM);
6152 //ZZ break;
6153 //ZZ case ARMneon_VMUL:
6154 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6155 //ZZ X1001, BITS4(N,Q,M,1), regM);
6156 //ZZ break;
6157 //ZZ case ARMneon_VMULLU:
6158 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
6159 //ZZ X1100, BITS4(N,0,M,0), regM);
6160 //ZZ break;
6161 //ZZ case ARMneon_VMULLS:
6162 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
6163 //ZZ X1100, BITS4(N,0,M,0), regM);
6164 //ZZ break;
6165 //ZZ case ARMneon_VMULP:
6166 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6167 //ZZ X1001, BITS4(N,Q,M,1), regM);
6168 //ZZ break;
6169 //ZZ case ARMneon_VMULFP:
6170 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
6171 //ZZ X1101, BITS4(N,Q,M,1), regM);
6172 //ZZ break;
6173 //ZZ case ARMneon_VMULLP:
6174 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
6175 //ZZ X1110, BITS4(N,0,M,0), regM);
6176 //ZZ break;
6177 //ZZ case ARMneon_VQDMULH:
6178 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6179 //ZZ X1011, BITS4(N,Q,M,0), regM);
6180 //ZZ break;
6181 //ZZ case ARMneon_VQRDMULH:
6182 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6183 //ZZ X1011, BITS4(N,Q,M,0), regM);
6184 //ZZ break;
6185 //ZZ case ARMneon_VQDMULL:
6186 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
6187 //ZZ X1101, BITS4(N,0,M,0), regM);
6188 //ZZ break;
6189 //ZZ case ARMneon_VTBL:
6190 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
6191 //ZZ X1000, BITS4(N,0,M,0), regM);
6192 //ZZ break;
6193 //ZZ case ARMneon_VPADD:
6194 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6195 //ZZ X1011, BITS4(N,Q,M,1), regM);
6196 //ZZ break;
6197 //ZZ case ARMneon_VPADDFP:
6198 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
6199 //ZZ X1101, BITS4(N,Q,M,0), regM);
6200 //ZZ break;
6201 //ZZ case ARMneon_VPMINU:
6202 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6203 //ZZ X1010, BITS4(N,Q,M,1), regM);
6204 //ZZ break;
6205 //ZZ case ARMneon_VPMINS:
6206 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6207 //ZZ X1010, BITS4(N,Q,M,1), regM);
6208 //ZZ break;
6209 //ZZ case ARMneon_VPMAXU:
6210 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6211 //ZZ X1010, BITS4(N,Q,M,0), regM);
6212 //ZZ break;
6213 //ZZ case ARMneon_VPMAXS:
6214 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6215 //ZZ X1010, BITS4(N,Q,M,0), regM);
6216 //ZZ break;
6217 //ZZ case ARMneon_VADDFP: /* VADD reg, reg, reg */
6218 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
6219 //ZZ X1101, BITS4(N,Q,M,0), regM);
6220 //ZZ break;
6221 //ZZ case ARMneon_VSUBFP: /* VADD reg, reg, reg */
6222 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
6223 //ZZ X1101, BITS4(N,Q,M,0), regM);
6224 //ZZ break;
6225 //ZZ case ARMneon_VABDFP: /* VABD reg, reg, reg */
6226 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
6227 //ZZ X1101, BITS4(N,Q,M,0), regM);
6228 //ZZ break;
6229 //ZZ case ARMneon_VMINF:
6230 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
6231 //ZZ X1111, BITS4(N,Q,M,0), regM);
6232 //ZZ break;
6233 //ZZ case ARMneon_VMAXF:
6234 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
6235 //ZZ X1111, BITS4(N,Q,M,0), regM);
6236 //ZZ break;
6237 //ZZ case ARMneon_VPMINF:
6238 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
6239 //ZZ X1111, BITS4(N,Q,M,0), regM);
6240 //ZZ break;
6241 //ZZ case ARMneon_VPMAXF:
6242 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
6243 //ZZ X1111, BITS4(N,Q,M,0), regM);
6244 //ZZ break;
6245 //ZZ case ARMneon_VRECPS:
6246 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
6247 //ZZ BITS4(N,Q,M,1), regM);
6248 //ZZ break;
6249 //ZZ case ARMneon_VCGTF:
6250 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
6251 //ZZ BITS4(N,Q,M,0), regM);
6252 //ZZ break;
6253 //ZZ case ARMneon_VCGEF:
6254 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
6255 //ZZ BITS4(N,Q,M,0), regM);
6256 //ZZ break;
6257 //ZZ case ARMneon_VCEQF:
6258 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
6259 //ZZ BITS4(N,Q,M,0), regM);
6260 //ZZ break;
6261 //ZZ case ARMneon_VRSQRTS:
6262 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
6263 //ZZ BITS4(N,Q,M,1), regM);
6264 //ZZ break;
6265 //ZZ default:
6266 //ZZ goto bad;
6267 //ZZ }
6268 //ZZ *p++ = insn;
6269 //ZZ goto done;
6270 //ZZ }
6271 //ZZ case ARMin_NShift: {
6272 //ZZ UInt Q = i->ARMin.NShift.Q ? 1 : 0;
6273 //ZZ UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
6274 //ZZ ? (qregNo(i->ARMin.NShift.dst) << 1)
6275 //ZZ : dregNo(i->ARMin.NShift.dst);
6276 //ZZ UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
6277 //ZZ ? (qregNo(i->ARMin.NShift.argL) << 1)
6278 //ZZ : dregNo(i->ARMin.NShift.argL);
6279 //ZZ UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
6280 //ZZ ? (qregNo(i->ARMin.NShift.argR) << 1)
6281 //ZZ : dregNo(i->ARMin.NShift.argR);
6282 //ZZ UInt sz1 = i->ARMin.NShift.size >> 1;
6283 //ZZ UInt sz2 = i->ARMin.NShift.size & 1;
6284 //ZZ UInt D = regD >> 4;
6285 //ZZ UInt N = regN >> 4;
6286 //ZZ UInt M = regM >> 4;
6287 //ZZ UInt insn;
6288 //ZZ regD &= 0xF;
6289 //ZZ regM &= 0xF;
6290 //ZZ regN &= 0xF;
6291 //ZZ switch (i->ARMin.NShift.op) {
6292 //ZZ case ARMneon_VSHL:
6293 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6294 //ZZ X0100, BITS4(N,Q,M,0), regM);
6295 //ZZ break;
6296 //ZZ case ARMneon_VSAL:
6297 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6298 //ZZ X0100, BITS4(N,Q,M,0), regM);
6299 //ZZ break;
6300 //ZZ case ARMneon_VQSHL:
6301 //ZZ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
6302 //ZZ X0100, BITS4(N,Q,M,1), regM);
6303 //ZZ break;
6304 //ZZ case ARMneon_VQSAL:
6305 //ZZ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
6306 //ZZ X0100, BITS4(N,Q,M,1), regM);
6307 //ZZ break;
6308 //ZZ default:
6309 //ZZ goto bad;
6310 //ZZ }
6311 //ZZ *p++ = insn;
6312 //ZZ goto done;
6313 //ZZ }
6314 //ZZ case ARMin_NShl64: {
6315 //ZZ HReg regDreg = i->ARMin.NShl64.dst;
6316 //ZZ HReg regMreg = i->ARMin.NShl64.src;
6317 //ZZ UInt amt = i->ARMin.NShl64.amt;
6318 //ZZ vassert(amt >= 1 && amt <= 63);
6319 //ZZ vassert(hregClass(regDreg) == HRcFlt64);
6320 //ZZ vassert(hregClass(regMreg) == HRcFlt64);
6321 //ZZ UInt regD = dregNo(regDreg);
6322 //ZZ UInt regM = dregNo(regMreg);
6323 //ZZ UInt D = (regD >> 4) & 1;
6324 //ZZ UInt Vd = regD & 0xF;
6325 //ZZ UInt L = 1;
6326 //ZZ UInt Q = 0; /* always 64-bit */
6327 //ZZ UInt M = (regM >> 4) & 1;
6328 //ZZ UInt Vm = regM & 0xF;
6329 //ZZ UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
6330 //ZZ amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
6331 //ZZ *p++ = insn;
6332 //ZZ goto done;
6333 //ZZ }
6334 case ARM64in_VImmQ: {
6335 UInt rQ = qregNo(i->ARM64in.VImmQ.rQ);
6336 UShort imm = i->ARM64in.VImmQ.imm;
6337 if (imm == 0x0000) {
6338 /* movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ */
6339 vassert(rQ < 32);
6340 *p++ = 0x4F000400 | rQ;
6341 goto done;
6342 }
6343 if (imm == 0x0001) {
6344 /* movi rD, #0xFF == 0x2F 0x00 0xE4 001 rD */
6345 vassert(rQ < 32);
6346 *p++ = 0x2F00E420 | rQ;
6347 goto done;
6348 }
6349 if (imm == 0x0003) {
6350 /* movi rD, #0xFFFF == 0x2F 0x00 0xE4 011 rD */
6351 vassert(rQ < 32);
6352 *p++ = 0x2F00E460 | rQ;
6353 goto done;
6354 }
6355 if (imm == 0x000F) {
6356 /* movi rD, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rD */
6357 vassert(rQ < 32);
6358 *p++ = 0x2F00E5E0 | rQ;
6359 goto done;
6360 }
6361 if (imm == 0x00FF) {
6362 /* movi rD, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rD */
6363 vassert(rQ < 32);
6364 *p++ = 0x2F07E7E0 | rQ;
6365 goto done;
6366 }
6367 goto bad; /* no other handled cases right now */
6368 }
6369
6370 case ARM64in_VDfromX: {
6371 /* INS Vd.D[0], rX
6372 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
6373 This isn't wonderful, in the sense that the upper half of
6374 the vector register stays unchanged and thus the insn is
6375 data dependent on its output register. */
6376 UInt dd = dregNo(i->ARM64in.VDfromX.rD);
6377 UInt xx = iregNo(i->ARM64in.VDfromX.rX);
6378 vassert(xx < 31);
6379 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
6380 goto done;
6381 }
6382
6383 case ARM64in_VQfromXX: {
6384 /* What we really generate is a two insn sequence:
6385 INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
6386 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
6387 0100 1110 0001 1000 0001 11 nn dd INS Vd.D[1], Xn
6388 */
6389 UInt qq = qregNo(i->ARM64in.VQfromXX.rQ);
6390 UInt xhi = iregNo(i->ARM64in.VQfromXX.rXhi);
6391 UInt xlo = iregNo(i->ARM64in.VQfromXX.rXlo);
6392 vassert(xhi < 31 && xlo < 31);
6393 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq);
6394 *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq);
6395 goto done;
6396 }
6397
6398 case ARM64in_VXfromQ: {
6399 /* 010 0111 0000 01000 001111 nn dd UMOV Xd, Vn.D[0]
6400 010 0111 0000 11000 001111 nn dd UMOV Xd, Vn.D[1]
6401 */
6402 UInt dd = iregNo(i->ARM64in.VXfromQ.rX);
6403 UInt nn = qregNo(i->ARM64in.VXfromQ.rQ);
6404 UInt laneNo = i->ARM64in.VXfromQ.laneNo;
6405 vassert(dd < 31);
6406 vassert(laneNo < 2);
6407 *p++ = X_3_8_5_6_5_5(X010, X01110000,
6408 laneNo == 1 ? X11000 : X01000, X001111, nn, dd);
6409 goto done;
6410 }
6411
6412 case ARM64in_VMov: {
6413 /* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
6414 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
6415 010 01110 10 1 n 0 00111 n d MOV Vd.16b, Vn.16b
6416 */
6417 HReg rD = i->ARM64in.VMov.dst;
6418 HReg rN = i->ARM64in.VMov.src;
6419 switch (i->ARM64in.VMov.szB) {
6420 case 8: {
6421 UInt dd = dregNo(rD);
6422 UInt nn = dregNo(rN);
6423 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd);
6424 goto done;
6425 }
6426 default:
6427 break;
6428 }
6429 goto bad;
6430 }
6431 //ZZ case ARMin_NeonImm: {
6432 //ZZ UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
6433 //ZZ UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
6434 //ZZ dregNo(i->ARMin.NeonImm.dst);
6435 //ZZ UInt D = regD >> 4;
6436 //ZZ UInt imm = i->ARMin.NeonImm.imm->imm8;
6437 //ZZ UInt tp = i->ARMin.NeonImm.imm->type;
6438 //ZZ UInt j = imm >> 7;
6439 //ZZ UInt imm3 = (imm >> 4) & 0x7;
6440 //ZZ UInt imm4 = imm & 0xF;
6441 //ZZ UInt cmode, op;
6442 //ZZ UInt insn;
6443 //ZZ regD &= 0xF;
6444 //ZZ if (tp == 9)
6445 //ZZ op = 1;
6446 //ZZ else
6447 //ZZ op = 0;
6448 //ZZ switch (tp) {
6449 //ZZ case 0:
6450 //ZZ case 1:
6451 //ZZ case 2:
6452 //ZZ case 3:
6453 //ZZ case 4:
6454 //ZZ case 5:
6455 //ZZ cmode = tp << 1;
6456 //ZZ break;
6457 //ZZ case 9:
6458 //ZZ case 6:
6459 //ZZ cmode = 14;
6460 //ZZ break;
6461 //ZZ case 7:
6462 //ZZ cmode = 12;
6463 //ZZ break;
6464 //ZZ case 8:
6465 //ZZ cmode = 13;
6466 //ZZ break;
6467 //ZZ case 10:
6468 //ZZ cmode = 15;
6469 //ZZ break;
6470 //ZZ default:
6471 //ZZ vpanic("ARMin_NeonImm");
6472 //ZZ
6473 //ZZ }
6474 //ZZ insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
6475 //ZZ cmode, BITS4(0,Q,op,1), imm4);
6476 //ZZ *p++ = insn;
6477 //ZZ goto done;
6478 //ZZ }
6479 //ZZ case ARMin_NCMovQ: {
6480 //ZZ UInt cc = (UInt)i->ARMin.NCMovQ.cond;
6481 //ZZ UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
6482 //ZZ UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
6483 //ZZ UInt vM = qM & 0xF;
6484 //ZZ UInt vD = qD & 0xF;
6485 //ZZ UInt M = (qM >> 4) & 1;
6486 //ZZ UInt D = (qD >> 4) & 1;
6487 //ZZ vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
6488 //ZZ /* b!cc here+8: !cc A00 0000 */
6489 //ZZ UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
6490 //ZZ *p++ = insn;
6491 //ZZ /* vmov qD, qM */
6492 //ZZ insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
6493 //ZZ vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
6494 //ZZ *p++ = insn;
6495 //ZZ goto done;
6496 //ZZ }
6497 //ZZ case ARMin_Add32: {
6498 //ZZ UInt regD = iregNo(i->ARMin.Add32.rD);
6499 //ZZ UInt regN = iregNo(i->ARMin.Add32.rN);
6500 //ZZ UInt imm32 = i->ARMin.Add32.imm32;
6501 //ZZ vassert(regD != regN);
6502 //ZZ /* MOV regD, imm32 */
6503 //ZZ p = imm32_to_iregNo((UInt *)p, regD, imm32);
6504 //ZZ /* ADD regD, regN, regD */
6505 //ZZ UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
6506 //ZZ *p++ = insn;
6507 //ZZ goto done;
6508 //ZZ }
6509
6510 case ARM64in_EvCheck: {
6511 /* The sequence is fixed (canned) except for the two amodes
6512 supplied by the insn. These don't change the length, though.
6513 We generate:
6514 ldr w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
6515 subs w9, w9, #1
6516 str w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
6517 bpl nofail
6518 ldr x9, [x21 + #0] 0 == offsetof(host_EvC_FAILADDR)
6519 br x9
6520 nofail:
6521 */
6522 UInt* p0 = p;
6523 p = do_load_or_store32(p, True/*isLoad*/, /*w*/9,
6524 i->ARM64in.EvCheck.amCounter);
6525 *p++ = 0x71000529; /* subs w9, w9, #1 */
6526 p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9,
6527 i->ARM64in.EvCheck.amCounter);
6528 *p++ = 0x54000065; /* bpl nofail */
6529 p = do_load_or_store64(p, True/*isLoad*/, /*x*/9,
6530 i->ARM64in.EvCheck.amFailAddr);
6531 *p++ = 0xD61F0120; /* br x9 */
6532 /* nofail: */
6533
6534 /* Crosscheck */
6535 vassert(evCheckSzB_ARM64() == (UChar*)p - (UChar*)p0);
6536 goto done;
6537 }
6538
6539 //ZZ case ARMin_ProfInc: {
6540 //ZZ /* We generate:
6541 //ZZ (ctrP is unknown now, so use 0x65556555 in the
6542 //ZZ expectation that a later call to LibVEX_patchProfCtr
6543 //ZZ will be used to fill in the immediate fields once the
6544 //ZZ right value is known.)
6545 //ZZ movw r12, lo16(0x65556555)
6546 //ZZ movt r12, lo16(0x65556555)
6547 //ZZ ldr r11, [r12]
6548 //ZZ adds r11, r11, #1
6549 //ZZ str r11, [r12]
6550 //ZZ ldr r11, [r12+4]
6551 //ZZ adc r11, r11, #0
6552 //ZZ str r11, [r12+4]
6553 //ZZ */
6554 //ZZ p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555);
6555 //ZZ *p++ = 0xE59CB000;
6556 //ZZ *p++ = 0xE29BB001;
6557 //ZZ *p++ = 0xE58CB000;
6558 //ZZ *p++ = 0xE59CB004;
6559 //ZZ *p++ = 0xE2ABB000;
6560 //ZZ *p++ = 0xE58CB004;
6561 //ZZ /* Tell the caller .. */
6562 //ZZ vassert(!(*is_profInc));
6563 //ZZ *is_profInc = True;
6564 //ZZ goto done;
6565 //ZZ }
6566
6567 /* ... */
6568 default:
6569 goto bad;
6570 }
6571
6572 bad:
6573 ppARM64Instr(i);
6574 vpanic("emit_ARM64Instr");
6575 /*NOTREACHED*/
6576
6577 done:
6578 vassert(((UChar*)p) - &buf[0] <= 36);
6579 return ((UChar*)p) - &buf[0];
6580 }
6581
6582
6583 /* How big is an event check? See case for ARM64in_EvCheck in
6584 emit_ARM64Instr just above. That crosschecks what this returns, so
6585 we can tell if we're inconsistent. */
evCheckSzB_ARM64(void)6586 Int evCheckSzB_ARM64 ( void )
6587 {
6588 return 24;
6589 }
6590
6591
6592 /* NB: what goes on here has to be very closely coordinated with the
6593 emitInstr case for XDirect, above. */
chainXDirect_ARM64(void * place_to_chain,void * disp_cp_chain_me_EXPECTED,void * place_to_jump_to)6594 VexInvalRange chainXDirect_ARM64 ( void* place_to_chain,
6595 void* disp_cp_chain_me_EXPECTED,
6596 void* place_to_jump_to )
6597 {
6598 /* What we're expecting to see is:
6599 movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
6600 movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
6601 movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
6602 movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
6603 blr x9
6604 viz
6605 <16 bytes generated by imm64_to_iregNo_EXACTLY4>
6606 D6 3F 01 20
6607 */
6608 UInt* p = (UInt*)place_to_chain;
6609 vassert(0 == (3 & (HWord)p));
6610 vassert(is_imm64_to_iregNo_EXACTLY4(
6611 p, /*x*/9, Ptr_to_ULong(disp_cp_chain_me_EXPECTED)));
6612 vassert(p[4] == 0xD63F0120);
6613
6614 /* And what we want to change it to is:
6615 movw x9, place_to_jump_to[15:0]
6616 movk x9, place_to_jump_to[31:15], lsl 16
6617 movk x9, place_to_jump_to[47:32], lsl 32
6618 movk x9, place_to_jump_to[63:48], lsl 48
6619 br x9
6620 viz
6621 <16 bytes generated by imm64_to_iregNo_EXACTLY4>
6622 D6 1F 01 20
6623
6624 The replacement has the same length as the original.
6625 */
6626 (void)imm64_to_iregNo_EXACTLY4(
6627 p, /*x*/9, Ptr_to_ULong(place_to_jump_to));
6628 p[4] = 0xD61F0120;
6629
6630 VexInvalRange vir = {(HWord)p, 20};
6631 return vir;
6632 }
6633
6634
6635 /* NB: what goes on here has to be very closely coordinated with the
6636 emitInstr case for XDirect, above. */
unchainXDirect_ARM64(void * place_to_unchain,void * place_to_jump_to_EXPECTED,void * disp_cp_chain_me)6637 VexInvalRange unchainXDirect_ARM64 ( void* place_to_unchain,
6638 void* place_to_jump_to_EXPECTED,
6639 void* disp_cp_chain_me )
6640 {
6641 /* What we're expecting to see is:
6642 movw x9, place_to_jump_to_EXPECTED[15:0]
6643 movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
6644 movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
6645 movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
6646 br x9
6647 viz
6648 <16 bytes generated by imm64_to_iregNo_EXACTLY4>
6649 D6 1F 01 20
6650 */
6651 UInt* p = (UInt*)place_to_unchain;
6652 vassert(0 == (3 & (HWord)p));
6653 vassert(is_imm64_to_iregNo_EXACTLY4(
6654 p, /*x*/9, Ptr_to_ULong(place_to_jump_to_EXPECTED)));
6655 vassert(p[4] == 0xD61F0120);
6656
6657 /* And what we want to change it to is:
6658 movw x9, disp_cp_chain_me_to[15:0]
6659 movk x9, disp_cp_chain_me_to[31:15], lsl 16
6660 movk x9, disp_cp_chain_me_to[47:32], lsl 32
6661 movk x9, disp_cp_chain_me_to[63:48], lsl 48
6662 blr x9
6663 viz
6664 <16 bytes generated by imm64_to_iregNo_EXACTLY4>
6665 D6 3F 01 20
6666 */
6667 (void)imm64_to_iregNo_EXACTLY4(
6668 p, /*x*/9, Ptr_to_ULong(disp_cp_chain_me));
6669 p[4] = 0xD63F0120;
6670
6671 VexInvalRange vir = {(HWord)p, 20};
6672 return vir;
6673 }
6674
6675
6676 //ZZ /* Patch the counter address into a profile inc point, as previously
6677 //ZZ created by the ARMin_ProfInc case for emit_ARMInstr. */
6678 //ZZ VexInvalRange patchProfInc_ARM ( void* place_to_patch,
6679 //ZZ ULong* location_of_counter )
6680 //ZZ {
6681 //ZZ vassert(sizeof(ULong*) == 4);
6682 //ZZ UInt* p = (UInt*)place_to_patch;
6683 //ZZ vassert(0 == (3 & (HWord)p));
6684 //ZZ vassert(is_imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555));
6685 //ZZ vassert(p[2] == 0xE59CB000);
6686 //ZZ vassert(p[3] == 0xE29BB001);
6687 //ZZ vassert(p[4] == 0xE58CB000);
6688 //ZZ vassert(p[5] == 0xE59CB004);
6689 //ZZ vassert(p[6] == 0xE2ABB000);
6690 //ZZ vassert(p[7] == 0xE58CB004);
6691 //ZZ imm32_to_iregNo_EXACTLY2(p, /*r*/12,
6692 //ZZ (UInt)Ptr_to_ULong(location_of_counter));
6693 //ZZ VexInvalRange vir = {(HWord)p, 8};
6694 //ZZ return vir;
6695 //ZZ }
6696 //ZZ
6697 //ZZ
6698 //ZZ #undef BITS4
6699 //ZZ #undef X0000
6700 //ZZ #undef X0001
6701 //ZZ #undef X0010
6702 //ZZ #undef X0011
6703 //ZZ #undef X0100
6704 //ZZ #undef X0101
6705 //ZZ #undef X0110
6706 //ZZ #undef X0111
6707 //ZZ #undef X1000
6708 //ZZ #undef X1001
6709 //ZZ #undef X1010
6710 //ZZ #undef X1011
6711 //ZZ #undef X1100
6712 //ZZ #undef X1101
6713 //ZZ #undef X1110
6714 //ZZ #undef X1111
6715 //ZZ #undef XXXXX___
6716 //ZZ #undef XXXXXX__
6717 //ZZ #undef XXX___XX
6718 //ZZ #undef XXXXX__X
6719 //ZZ #undef XXXXXXXX
6720 //ZZ #undef XX______
6721
6722 /*---------------------------------------------------------------*/
6723 /*--- end host_arm64_defs.c ---*/
6724 /*---------------------------------------------------------------*/
6725