1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_defs.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2013 OpenWorks LLP
11 info@open-works.net
12
13 NEON support is
14 Copyright (C) 2010-2013 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
17
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
22
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 02110-1301, USA.
32
33 The GNU General Public License is contained in the file COPYING.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
39
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_arm_defs.h"
43
44 UInt arm_hwcaps = 0;
45
46
47 /* --------- Registers. --------- */
48
49 /* The usual HReg abstraction.
50 There are 16 general purpose regs.
51 */
52
ppHRegARM(HReg reg)53 void ppHRegARM ( HReg reg ) {
54 Int r;
55 /* Be generic for all virtual regs. */
56 if (hregIsVirtual(reg)) {
57 ppHReg(reg);
58 return;
59 }
60 /* But specific for real regs. */
61 switch (hregClass(reg)) {
62 case HRcInt32:
63 r = hregNumber(reg);
64 vassert(r >= 0 && r < 16);
65 vex_printf("r%d", r);
66 return;
67 case HRcFlt64:
68 r = hregNumber(reg);
69 vassert(r >= 0 && r < 32);
70 vex_printf("d%d", r);
71 return;
72 case HRcFlt32:
73 r = hregNumber(reg);
74 vassert(r >= 0 && r < 32);
75 vex_printf("s%d", r);
76 return;
77 case HRcVec128:
78 r = hregNumber(reg);
79 vassert(r >= 0 && r < 16);
80 vex_printf("q%d", r);
81 return;
82 default:
83 vpanic("ppHRegARM");
84 }
85 }
86
hregARM_R0(void)87 HReg hregARM_R0 ( void ) { return mkHReg(0, HRcInt32, False); }
hregARM_R1(void)88 HReg hregARM_R1 ( void ) { return mkHReg(1, HRcInt32, False); }
hregARM_R2(void)89 HReg hregARM_R2 ( void ) { return mkHReg(2, HRcInt32, False); }
hregARM_R3(void)90 HReg hregARM_R3 ( void ) { return mkHReg(3, HRcInt32, False); }
hregARM_R4(void)91 HReg hregARM_R4 ( void ) { return mkHReg(4, HRcInt32, False); }
hregARM_R5(void)92 HReg hregARM_R5 ( void ) { return mkHReg(5, HRcInt32, False); }
hregARM_R6(void)93 HReg hregARM_R6 ( void ) { return mkHReg(6, HRcInt32, False); }
hregARM_R7(void)94 HReg hregARM_R7 ( void ) { return mkHReg(7, HRcInt32, False); }
hregARM_R8(void)95 HReg hregARM_R8 ( void ) { return mkHReg(8, HRcInt32, False); }
hregARM_R9(void)96 HReg hregARM_R9 ( void ) { return mkHReg(9, HRcInt32, False); }
hregARM_R10(void)97 HReg hregARM_R10 ( void ) { return mkHReg(10, HRcInt32, False); }
hregARM_R11(void)98 HReg hregARM_R11 ( void ) { return mkHReg(11, HRcInt32, False); }
hregARM_R12(void)99 HReg hregARM_R12 ( void ) { return mkHReg(12, HRcInt32, False); }
hregARM_R13(void)100 HReg hregARM_R13 ( void ) { return mkHReg(13, HRcInt32, False); }
hregARM_R14(void)101 HReg hregARM_R14 ( void ) { return mkHReg(14, HRcInt32, False); }
hregARM_R15(void)102 HReg hregARM_R15 ( void ) { return mkHReg(15, HRcInt32, False); }
hregARM_D8(void)103 HReg hregARM_D8 ( void ) { return mkHReg(8, HRcFlt64, False); }
hregARM_D9(void)104 HReg hregARM_D9 ( void ) { return mkHReg(9, HRcFlt64, False); }
hregARM_D10(void)105 HReg hregARM_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
hregARM_D11(void)106 HReg hregARM_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
hregARM_D12(void)107 HReg hregARM_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
hregARM_S26(void)108 HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
hregARM_S27(void)109 HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
hregARM_S28(void)110 HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
hregARM_S29(void)111 HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
hregARM_S30(void)112 HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
hregARM_Q8(void)113 HReg hregARM_Q8 ( void ) { return mkHReg(8, HRcVec128, False); }
hregARM_Q9(void)114 HReg hregARM_Q9 ( void ) { return mkHReg(9, HRcVec128, False); }
hregARM_Q10(void)115 HReg hregARM_Q10 ( void ) { return mkHReg(10, HRcVec128, False); }
hregARM_Q11(void)116 HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
hregARM_Q12(void)117 HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
hregARM_Q13(void)118 HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
hregARM_Q14(void)119 HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
hregARM_Q15(void)120 HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
121
getAllocableRegs_ARM(Int * nregs,HReg ** arr)122 void getAllocableRegs_ARM ( Int* nregs, HReg** arr )
123 {
124 Int i = 0;
125 *nregs = 26;
126 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
127 // callee saves ones are listed first, since we prefer them
128 // if they're available
129 (*arr)[i++] = hregARM_R4();
130 (*arr)[i++] = hregARM_R5();
131 (*arr)[i++] = hregARM_R6();
132 (*arr)[i++] = hregARM_R7();
133 (*arr)[i++] = hregARM_R10();
134 (*arr)[i++] = hregARM_R11();
135 // otherwise we'll have to slum it out with caller-saves ones
136 (*arr)[i++] = hregARM_R0();
137 (*arr)[i++] = hregARM_R1();
138 (*arr)[i++] = hregARM_R2();
139 (*arr)[i++] = hregARM_R3();
140 (*arr)[i++] = hregARM_R9();
141 // FP hreegisters. Note: these are all callee-save. Yay!
142 // Hence we don't need to mention them as trashed in
143 // getHRegUsage for ARMInstr_Call.
144 (*arr)[i++] = hregARM_D8();
145 (*arr)[i++] = hregARM_D9();
146 (*arr)[i++] = hregARM_D10();
147 (*arr)[i++] = hregARM_D11();
148 (*arr)[i++] = hregARM_D12();
149 (*arr)[i++] = hregARM_S26();
150 (*arr)[i++] = hregARM_S27();
151 (*arr)[i++] = hregARM_S28();
152 (*arr)[i++] = hregARM_S29();
153 (*arr)[i++] = hregARM_S30();
154
155 (*arr)[i++] = hregARM_Q8();
156 (*arr)[i++] = hregARM_Q9();
157 (*arr)[i++] = hregARM_Q10();
158 (*arr)[i++] = hregARM_Q11();
159 (*arr)[i++] = hregARM_Q12();
160
161 //(*arr)[i++] = hregARM_Q13();
162 //(*arr)[i++] = hregARM_Q14();
163 //(*arr)[i++] = hregARM_Q15();
164
165 // unavail: r8 as GSP
166 // r12 is used as a spill/reload temporary
167 // r13 as SP
168 // r14 as LR
169 // r15 as PC
170 //
171 // All in all, we have 11 allocatable integer registers:
172 // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
173 // and r12 dedicated as a spill temporary.
174 // 13 14 and 15 are not under the allocator's control.
175 //
176 // Hence for the allocatable registers we have:
177 //
178 // callee-saved: 4 5 6 7 (8) 9 10 11
179 // caller-saved: 0 1 2 3
180 // Note 9 is ambiguous: the base EABI does not give an e/r-saved
181 // designation for it, but the Linux instantiation of the ABI
182 // specifies it as callee-saved.
183 //
184 // If the set of available registers changes or if the e/r status
185 // changes, be sure to re-check/sync the definition of
186 // getHRegUsage for ARMInstr_Call too.
187 vassert(i == *nregs);
188 }
189
190
191
192 /* --------- Condition codes, ARM encoding. --------- */
193
showARMCondCode(ARMCondCode cond)194 const HChar* showARMCondCode ( ARMCondCode cond ) {
195 switch (cond) {
196 case ARMcc_EQ: return "eq";
197 case ARMcc_NE: return "ne";
198 case ARMcc_HS: return "hs";
199 case ARMcc_LO: return "lo";
200 case ARMcc_MI: return "mi";
201 case ARMcc_PL: return "pl";
202 case ARMcc_VS: return "vs";
203 case ARMcc_VC: return "vc";
204 case ARMcc_HI: return "hi";
205 case ARMcc_LS: return "ls";
206 case ARMcc_GE: return "ge";
207 case ARMcc_LT: return "lt";
208 case ARMcc_GT: return "gt";
209 case ARMcc_LE: return "le";
210 case ARMcc_AL: return "al"; // default
211 case ARMcc_NV: return "nv";
212 default: vpanic("showARMCondCode");
213 }
214 }
215
216
217 /* --------- Mem AModes: Addressing Mode 1 --------- */
218
ARMAMode1_RI(HReg reg,Int simm13)219 ARMAMode1* ARMAMode1_RI ( HReg reg, Int simm13 ) {
220 ARMAMode1* am = LibVEX_Alloc(sizeof(ARMAMode1));
221 am->tag = ARMam1_RI;
222 am->ARMam1.RI.reg = reg;
223 am->ARMam1.RI.simm13 = simm13;
224 vassert(-4095 <= simm13 && simm13 <= 4095);
225 return am;
226 }
ARMAMode1_RRS(HReg base,HReg index,UInt shift)227 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
228 ARMAMode1* am = LibVEX_Alloc(sizeof(ARMAMode1));
229 am->tag = ARMam1_RRS;
230 am->ARMam1.RRS.base = base;
231 am->ARMam1.RRS.index = index;
232 am->ARMam1.RRS.shift = shift;
233 vassert(0 <= shift && shift <= 3);
234 return am;
235 }
236
ppARMAMode1(ARMAMode1 * am)237 void ppARMAMode1 ( ARMAMode1* am ) {
238 switch (am->tag) {
239 case ARMam1_RI:
240 vex_printf("%d(", am->ARMam1.RI.simm13);
241 ppHRegARM(am->ARMam1.RI.reg);
242 vex_printf(")");
243 break;
244 case ARMam1_RRS:
245 vex_printf("(");
246 ppHRegARM(am->ARMam1.RRS.base);
247 vex_printf(",");
248 ppHRegARM(am->ARMam1.RRS.index);
249 vex_printf(",%u)", am->ARMam1.RRS.shift);
250 break;
251 default:
252 vassert(0);
253 }
254 }
255
addRegUsage_ARMAMode1(HRegUsage * u,ARMAMode1 * am)256 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
257 switch (am->tag) {
258 case ARMam1_RI:
259 addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
260 return;
261 case ARMam1_RRS:
262 // addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
263 // addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
264 // return;
265 default:
266 vpanic("addRegUsage_ARMAmode1");
267 }
268 }
269
mapRegs_ARMAMode1(HRegRemap * m,ARMAMode1 * am)270 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
271 switch (am->tag) {
272 case ARMam1_RI:
273 am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
274 return;
275 case ARMam1_RRS:
276 //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
277 //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
278 //return;
279 default:
280 vpanic("mapRegs_ARMAmode1");
281 }
282 }
283
284
285 /* --------- Mem AModes: Addressing Mode 2 --------- */
286
ARMAMode2_RI(HReg reg,Int simm9)287 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
288 ARMAMode2* am = LibVEX_Alloc(sizeof(ARMAMode2));
289 am->tag = ARMam2_RI;
290 am->ARMam2.RI.reg = reg;
291 am->ARMam2.RI.simm9 = simm9;
292 vassert(-255 <= simm9 && simm9 <= 255);
293 return am;
294 }
ARMAMode2_RR(HReg base,HReg index)295 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
296 ARMAMode2* am = LibVEX_Alloc(sizeof(ARMAMode2));
297 am->tag = ARMam2_RR;
298 am->ARMam2.RR.base = base;
299 am->ARMam2.RR.index = index;
300 return am;
301 }
302
ppARMAMode2(ARMAMode2 * am)303 void ppARMAMode2 ( ARMAMode2* am ) {
304 switch (am->tag) {
305 case ARMam2_RI:
306 vex_printf("%d(", am->ARMam2.RI.simm9);
307 ppHRegARM(am->ARMam2.RI.reg);
308 vex_printf(")");
309 break;
310 case ARMam2_RR:
311 vex_printf("(");
312 ppHRegARM(am->ARMam2.RR.base);
313 vex_printf(",");
314 ppHRegARM(am->ARMam2.RR.index);
315 vex_printf(")");
316 break;
317 default:
318 vassert(0);
319 }
320 }
321
addRegUsage_ARMAMode2(HRegUsage * u,ARMAMode2 * am)322 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
323 switch (am->tag) {
324 case ARMam2_RI:
325 addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
326 return;
327 case ARMam2_RR:
328 // addHRegUse(u, HRmRead, am->ARMam2.RR.base);
329 // addHRegUse(u, HRmRead, am->ARMam2.RR.index);
330 // return;
331 default:
332 vpanic("addRegUsage_ARMAmode2");
333 }
334 }
335
mapRegs_ARMAMode2(HRegRemap * m,ARMAMode2 * am)336 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
337 switch (am->tag) {
338 case ARMam2_RI:
339 am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
340 return;
341 case ARMam2_RR:
342 //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
343 //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
344 //return;
345 default:
346 vpanic("mapRegs_ARMAmode2");
347 }
348 }
349
350
351 /* --------- Mem AModes: Addressing Mode VFP --------- */
352
mkARMAModeV(HReg reg,Int simm11)353 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
354 ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
355 vassert(simm11 >= -1020 && simm11 <= 1020);
356 vassert(0 == (simm11 & 3));
357 am->reg = reg;
358 am->simm11 = simm11;
359 return am;
360 }
361
ppARMAModeV(ARMAModeV * am)362 void ppARMAModeV ( ARMAModeV* am ) {
363 vex_printf("%d(", am->simm11);
364 ppHRegARM(am->reg);
365 vex_printf(")");
366 }
367
addRegUsage_ARMAModeV(HRegUsage * u,ARMAModeV * am)368 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
369 addHRegUse(u, HRmRead, am->reg);
370 }
371
mapRegs_ARMAModeV(HRegRemap * m,ARMAModeV * am)372 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
373 am->reg = lookupHRegRemap(m, am->reg);
374 }
375
376
377 /* --------- Mem AModes: Addressing Mode Neon ------- */
378
mkARMAModeN_RR(HReg rN,HReg rM)379 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
380 ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
381 am->tag = ARMamN_RR;
382 am->ARMamN.RR.rN = rN;
383 am->ARMamN.RR.rM = rM;
384 return am;
385 }
386
mkARMAModeN_R(HReg rN)387 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
388 ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
389 am->tag = ARMamN_R;
390 am->ARMamN.R.rN = rN;
391 return am;
392 }
393
addRegUsage_ARMAModeN(HRegUsage * u,ARMAModeN * am)394 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
395 if (am->tag == ARMamN_R) {
396 addHRegUse(u, HRmRead, am->ARMamN.R.rN);
397 } else {
398 addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
399 addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
400 }
401 }
402
mapRegs_ARMAModeN(HRegRemap * m,ARMAModeN * am)403 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
404 if (am->tag == ARMamN_R) {
405 am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
406 } else {
407 am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
408 am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
409 }
410 }
411
ppARMAModeN(ARMAModeN * am)412 void ppARMAModeN ( ARMAModeN* am ) {
413 vex_printf("[");
414 if (am->tag == ARMamN_R) {
415 ppHRegARM(am->ARMamN.R.rN);
416 } else {
417 ppHRegARM(am->ARMamN.RR.rN);
418 }
419 vex_printf("]");
420 if (am->tag == ARMamN_RR) {
421 vex_printf(", ");
422 ppHRegARM(am->ARMamN.RR.rM);
423 }
424 }
425
426
427 /* --------- Reg or imm-8x4 operands --------- */
428
ROR32(UInt x,UInt sh)429 static UInt ROR32 ( UInt x, UInt sh ) {
430 vassert(sh >= 0 && sh < 32);
431 if (sh == 0)
432 return x;
433 else
434 return (x << (32-sh)) | (x >> sh);
435 }
436
ARMRI84_I84(UShort imm8,UShort imm4)437 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
438 ARMRI84* ri84 = LibVEX_Alloc(sizeof(ARMRI84));
439 ri84->tag = ARMri84_I84;
440 ri84->ARMri84.I84.imm8 = imm8;
441 ri84->ARMri84.I84.imm4 = imm4;
442 vassert(imm8 >= 0 && imm8 <= 255);
443 vassert(imm4 >= 0 && imm4 <= 15);
444 return ri84;
445 }
ARMRI84_R(HReg reg)446 ARMRI84* ARMRI84_R ( HReg reg ) {
447 ARMRI84* ri84 = LibVEX_Alloc(sizeof(ARMRI84));
448 ri84->tag = ARMri84_R;
449 ri84->ARMri84.R.reg = reg;
450 return ri84;
451 }
452
ppARMRI84(ARMRI84 * ri84)453 void ppARMRI84 ( ARMRI84* ri84 ) {
454 switch (ri84->tag) {
455 case ARMri84_I84:
456 vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
457 2 * ri84->ARMri84.I84.imm4));
458 break;
459 case ARMri84_R:
460 ppHRegARM(ri84->ARMri84.R.reg);
461 break;
462 default:
463 vassert(0);
464 }
465 }
466
addRegUsage_ARMRI84(HRegUsage * u,ARMRI84 * ri84)467 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
468 switch (ri84->tag) {
469 case ARMri84_I84:
470 return;
471 case ARMri84_R:
472 addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
473 return;
474 default:
475 vpanic("addRegUsage_ARMRI84");
476 }
477 }
478
mapRegs_ARMRI84(HRegRemap * m,ARMRI84 * ri84)479 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
480 switch (ri84->tag) {
481 case ARMri84_I84:
482 return;
483 case ARMri84_R:
484 ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
485 return;
486 default:
487 vpanic("mapRegs_ARMRI84");
488 }
489 }
490
491
492 /* --------- Reg or imm5 operands --------- */
493
ARMRI5_I5(UInt imm5)494 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
495 ARMRI5* ri5 = LibVEX_Alloc(sizeof(ARMRI5));
496 ri5->tag = ARMri5_I5;
497 ri5->ARMri5.I5.imm5 = imm5;
498 vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
499 return ri5;
500 }
ARMRI5_R(HReg reg)501 ARMRI5* ARMRI5_R ( HReg reg ) {
502 ARMRI5* ri5 = LibVEX_Alloc(sizeof(ARMRI5));
503 ri5->tag = ARMri5_R;
504 ri5->ARMri5.R.reg = reg;
505 return ri5;
506 }
507
ppARMRI5(ARMRI5 * ri5)508 void ppARMRI5 ( ARMRI5* ri5 ) {
509 switch (ri5->tag) {
510 case ARMri5_I5:
511 vex_printf("%u", ri5->ARMri5.I5.imm5);
512 break;
513 case ARMri5_R:
514 ppHRegARM(ri5->ARMri5.R.reg);
515 break;
516 default:
517 vassert(0);
518 }
519 }
520
addRegUsage_ARMRI5(HRegUsage * u,ARMRI5 * ri5)521 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
522 switch (ri5->tag) {
523 case ARMri5_I5:
524 return;
525 case ARMri5_R:
526 addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
527 return;
528 default:
529 vpanic("addRegUsage_ARMRI5");
530 }
531 }
532
mapRegs_ARMRI5(HRegRemap * m,ARMRI5 * ri5)533 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
534 switch (ri5->tag) {
535 case ARMri5_I5:
536 return;
537 case ARMri5_R:
538 ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
539 return;
540 default:
541 vpanic("mapRegs_ARMRI5");
542 }
543 }
544
545 /* -------- Neon Immediate operatnd --------- */
546
ARMNImm_TI(UInt type,UInt imm8)547 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
548 ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
549 i->type = type;
550 i->imm8 = imm8;
551 return i;
552 }
553
ARMNImm_to_Imm64(ARMNImm * imm)554 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
555 int i, j;
556 ULong y, x = imm->imm8;
557 switch (imm->type) {
558 case 3:
559 x = x << 8; /* fallthrough */
560 case 2:
561 x = x << 8; /* fallthrough */
562 case 1:
563 x = x << 8; /* fallthrough */
564 case 0:
565 return (x << 32) | x;
566 case 5:
567 case 6:
568 if (imm->type == 5)
569 x = x << 8;
570 else
571 x = (x << 8) | x;
572 /* fallthrough */
573 case 4:
574 x = (x << 16) | x;
575 return (x << 32) | x;
576 case 8:
577 x = (x << 8) | 0xFF;
578 /* fallthrough */
579 case 7:
580 x = (x << 8) | 0xFF;
581 return (x << 32) | x;
582 case 9:
583 x = 0;
584 for (i = 7; i >= 0; i--) {
585 y = ((ULong)imm->imm8 >> i) & 1;
586 for (j = 0; j < 8; j++) {
587 x = (x << 1) | y;
588 }
589 }
590 return x;
591 case 10:
592 x |= (x & 0x80) << 5;
593 x |= (~x & 0x40) << 5;
594 x &= 0x187F; /* 0001 1000 0111 1111 */
595 x |= (x & 0x40) << 4;
596 x |= (x & 0x40) << 3;
597 x |= (x & 0x40) << 2;
598 x |= (x & 0x40) << 1;
599 x = x << 19;
600 x = (x << 32) | x;
601 return x;
602 default:
603 vpanic("ARMNImm_to_Imm64");
604 }
605 }
606
Imm64_to_ARMNImm(ULong x)607 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
608 ARMNImm tmp;
609 if ((x & 0xFFFFFFFF) == (x >> 32)) {
610 if ((x & 0xFFFFFF00) == 0)
611 return ARMNImm_TI(0, x & 0xFF);
612 if ((x & 0xFFFF00FF) == 0)
613 return ARMNImm_TI(1, (x >> 8) & 0xFF);
614 if ((x & 0xFF00FFFF) == 0)
615 return ARMNImm_TI(2, (x >> 16) & 0xFF);
616 if ((x & 0x00FFFFFF) == 0)
617 return ARMNImm_TI(3, (x >> 24) & 0xFF);
618 if ((x & 0xFFFF00FF) == 0xFF)
619 return ARMNImm_TI(7, (x >> 8) & 0xFF);
620 if ((x & 0xFF00FFFF) == 0xFFFF)
621 return ARMNImm_TI(8, (x >> 16) & 0xFF);
622 if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
623 if ((x & 0xFF00) == 0)
624 return ARMNImm_TI(4, x & 0xFF);
625 if ((x & 0x00FF) == 0)
626 return ARMNImm_TI(5, (x >> 8) & 0xFF);
627 if ((x & 0xFF) == ((x >> 8) & 0xFF))
628 return ARMNImm_TI(6, x & 0xFF);
629 }
630 if ((x & 0x7FFFF) == 0) {
631 tmp.type = 10;
632 tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
633 if (ARMNImm_to_Imm64(&tmp) == x)
634 return ARMNImm_TI(tmp.type, tmp.imm8);
635 }
636 } else {
637 /* This can only be type 9. */
638 tmp.imm8 = (((x >> 56) & 1) << 7)
639 | (((x >> 48) & 1) << 6)
640 | (((x >> 40) & 1) << 5)
641 | (((x >> 32) & 1) << 4)
642 | (((x >> 24) & 1) << 3)
643 | (((x >> 16) & 1) << 2)
644 | (((x >> 8) & 1) << 1)
645 | (((x >> 0) & 1) << 0);
646 tmp.type = 9;
647 if (ARMNImm_to_Imm64 (&tmp) == x)
648 return ARMNImm_TI(tmp.type, tmp.imm8);
649 }
650 return NULL;
651 }
652
ppARMNImm(ARMNImm * i)653 void ppARMNImm (ARMNImm* i) {
654 ULong x = ARMNImm_to_Imm64(i);
655 vex_printf("0x%llX%llX", x, x);
656 }
657
658 /* -- Register or scalar operand --- */
659
mkARMNRS(ARMNRS_tag tag,HReg reg,UInt index)660 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
661 {
662 ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
663 p->tag = tag;
664 p->reg = reg;
665 p->index = index;
666 return p;
667 }
668
ppARMNRS(ARMNRS * p)669 void ppARMNRS(ARMNRS *p)
670 {
671 ppHRegARM(p->reg);
672 if (p->tag == ARMNRS_Scalar) {
673 vex_printf("[%d]", p->index);
674 }
675 }
676
677 /* --------- Instructions. --------- */
678
showARMAluOp(ARMAluOp op)679 const HChar* showARMAluOp ( ARMAluOp op ) {
680 switch (op) {
681 case ARMalu_ADD: return "add";
682 case ARMalu_ADDS: return "adds";
683 case ARMalu_ADC: return "adc";
684 case ARMalu_SUB: return "sub";
685 case ARMalu_SUBS: return "subs";
686 case ARMalu_SBC: return "sbc";
687 case ARMalu_AND: return "and";
688 case ARMalu_BIC: return "bic";
689 case ARMalu_OR: return "orr";
690 case ARMalu_XOR: return "xor";
691 default: vpanic("showARMAluOp");
692 }
693 }
694
showARMShiftOp(ARMShiftOp op)695 const HChar* showARMShiftOp ( ARMShiftOp op ) {
696 switch (op) {
697 case ARMsh_SHL: return "shl";
698 case ARMsh_SHR: return "shr";
699 case ARMsh_SAR: return "sar";
700 default: vpanic("showARMShiftOp");
701 }
702 }
703
showARMUnaryOp(ARMUnaryOp op)704 const HChar* showARMUnaryOp ( ARMUnaryOp op ) {
705 switch (op) {
706 case ARMun_NEG: return "neg";
707 case ARMun_NOT: return "not";
708 case ARMun_CLZ: return "clz";
709 default: vpanic("showARMUnaryOp");
710 }
711 }
712
showARMMulOp(ARMMulOp op)713 const HChar* showARMMulOp ( ARMMulOp op ) {
714 switch (op) {
715 case ARMmul_PLAIN: return "mul";
716 case ARMmul_ZX: return "umull";
717 case ARMmul_SX: return "smull";
718 default: vpanic("showARMMulOp");
719 }
720 }
721
showARMVfpOp(ARMVfpOp op)722 const HChar* showARMVfpOp ( ARMVfpOp op ) {
723 switch (op) {
724 case ARMvfp_ADD: return "add";
725 case ARMvfp_SUB: return "sub";
726 case ARMvfp_MUL: return "mul";
727 case ARMvfp_DIV: return "div";
728 default: vpanic("showARMVfpOp");
729 }
730 }
731
showARMVfpUnaryOp(ARMVfpUnaryOp op)732 const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
733 switch (op) {
734 case ARMvfpu_COPY: return "cpy";
735 case ARMvfpu_NEG: return "neg";
736 case ARMvfpu_ABS: return "abs";
737 case ARMvfpu_SQRT: return "sqrt";
738 default: vpanic("showARMVfpUnaryOp");
739 }
740 }
741
showARMNeonBinOp(ARMNeonBinOp op)742 const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
743 switch (op) {
744 case ARMneon_VAND: return "vand";
745 case ARMneon_VORR: return "vorr";
746 case ARMneon_VXOR: return "veor";
747 case ARMneon_VADD: return "vadd";
748 case ARMneon_VRHADDS: return "vrhadd";
749 case ARMneon_VRHADDU: return "vrhadd";
750 case ARMneon_VADDFP: return "vadd";
751 case ARMneon_VPADDFP: return "vpadd";
752 case ARMneon_VABDFP: return "vabd";
753 case ARMneon_VSUB: return "vsub";
754 case ARMneon_VSUBFP: return "vsub";
755 case ARMneon_VMINU: return "vmin";
756 case ARMneon_VMINS: return "vmin";
757 case ARMneon_VMINF: return "vmin";
758 case ARMneon_VMAXU: return "vmax";
759 case ARMneon_VMAXS: return "vmax";
760 case ARMneon_VMAXF: return "vmax";
761 case ARMneon_VQADDU: return "vqadd";
762 case ARMneon_VQADDS: return "vqadd";
763 case ARMneon_VQSUBU: return "vqsub";
764 case ARMneon_VQSUBS: return "vqsub";
765 case ARMneon_VCGTU: return "vcgt";
766 case ARMneon_VCGTS: return "vcgt";
767 case ARMneon_VCGTF: return "vcgt";
768 case ARMneon_VCGEF: return "vcgt";
769 case ARMneon_VCGEU: return "vcge";
770 case ARMneon_VCGES: return "vcge";
771 case ARMneon_VCEQ: return "vceq";
772 case ARMneon_VCEQF: return "vceq";
773 case ARMneon_VPADD: return "vpadd";
774 case ARMneon_VPMINU: return "vpmin";
775 case ARMneon_VPMINS: return "vpmin";
776 case ARMneon_VPMINF: return "vpmin";
777 case ARMneon_VPMAXU: return "vpmax";
778 case ARMneon_VPMAXS: return "vpmax";
779 case ARMneon_VPMAXF: return "vpmax";
780 case ARMneon_VEXT: return "vext";
781 case ARMneon_VMUL: return "vmuli";
782 case ARMneon_VMULLU: return "vmull";
783 case ARMneon_VMULLS: return "vmull";
784 case ARMneon_VMULP: return "vmul";
785 case ARMneon_VMULFP: return "vmul";
786 case ARMneon_VMULLP: return "vmul";
787 case ARMneon_VQDMULH: return "vqdmulh";
788 case ARMneon_VQRDMULH: return "vqrdmulh";
789 case ARMneon_VQDMULL: return "vqdmull";
790 case ARMneon_VTBL: return "vtbl";
791 case ARMneon_VRECPS: return "vrecps";
792 case ARMneon_VRSQRTS: return "vrecps";
793 case ARMneon_INVALID: return "??invalid??";
794 /* ... */
795 default: vpanic("showARMNeonBinOp");
796 }
797 }
798
showARMNeonBinOpDataType(ARMNeonBinOp op)799 const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
800 switch (op) {
801 case ARMneon_VAND:
802 case ARMneon_VORR:
803 case ARMneon_VXOR:
804 return "";
805 case ARMneon_VADD:
806 case ARMneon_VSUB:
807 case ARMneon_VEXT:
808 case ARMneon_VMUL:
809 case ARMneon_VPADD:
810 case ARMneon_VTBL:
811 case ARMneon_VCEQ:
812 return ".i";
813 case ARMneon_VRHADDU:
814 case ARMneon_VMINU:
815 case ARMneon_VMAXU:
816 case ARMneon_VQADDU:
817 case ARMneon_VQSUBU:
818 case ARMneon_VCGTU:
819 case ARMneon_VCGEU:
820 case ARMneon_VMULLU:
821 case ARMneon_VPMINU:
822 case ARMneon_VPMAXU:
823 return ".u";
824 case ARMneon_VRHADDS:
825 case ARMneon_VMINS:
826 case ARMneon_VMAXS:
827 case ARMneon_VQADDS:
828 case ARMneon_VQSUBS:
829 case ARMneon_VCGTS:
830 case ARMneon_VCGES:
831 case ARMneon_VQDMULL:
832 case ARMneon_VMULLS:
833 case ARMneon_VPMINS:
834 case ARMneon_VPMAXS:
835 case ARMneon_VQDMULH:
836 case ARMneon_VQRDMULH:
837 return ".s";
838 case ARMneon_VMULP:
839 case ARMneon_VMULLP:
840 return ".p";
841 case ARMneon_VADDFP:
842 case ARMneon_VABDFP:
843 case ARMneon_VPADDFP:
844 case ARMneon_VSUBFP:
845 case ARMneon_VMULFP:
846 case ARMneon_VMINF:
847 case ARMneon_VMAXF:
848 case ARMneon_VPMINF:
849 case ARMneon_VPMAXF:
850 case ARMneon_VCGTF:
851 case ARMneon_VCGEF:
852 case ARMneon_VCEQF:
853 case ARMneon_VRECPS:
854 case ARMneon_VRSQRTS:
855 return ".f";
856 /* ... */
857 default: vpanic("showARMNeonBinOpDataType");
858 }
859 }
860
showARMNeonUnOp(ARMNeonUnOp op)861 const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
862 switch (op) {
863 case ARMneon_COPY: return "vmov";
864 case ARMneon_COPYLS: return "vmov";
865 case ARMneon_COPYLU: return "vmov";
866 case ARMneon_COPYN: return "vmov";
867 case ARMneon_COPYQNSS: return "vqmovn";
868 case ARMneon_COPYQNUS: return "vqmovun";
869 case ARMneon_COPYQNUU: return "vqmovn";
870 case ARMneon_NOT: return "vmvn";
871 case ARMneon_EQZ: return "vceq";
872 case ARMneon_CNT: return "vcnt";
873 case ARMneon_CLS: return "vcls";
874 case ARMneon_CLZ: return "vclz";
875 case ARMneon_DUP: return "vdup";
876 case ARMneon_PADDLS: return "vpaddl";
877 case ARMneon_PADDLU: return "vpaddl";
878 case ARMneon_VQSHLNSS: return "vqshl";
879 case ARMneon_VQSHLNUU: return "vqshl";
880 case ARMneon_VQSHLNUS: return "vqshlu";
881 case ARMneon_REV16: return "vrev16";
882 case ARMneon_REV32: return "vrev32";
883 case ARMneon_REV64: return "vrev64";
884 case ARMneon_VCVTFtoU: return "vcvt";
885 case ARMneon_VCVTFtoS: return "vcvt";
886 case ARMneon_VCVTUtoF: return "vcvt";
887 case ARMneon_VCVTStoF: return "vcvt";
888 case ARMneon_VCVTFtoFixedU: return "vcvt";
889 case ARMneon_VCVTFtoFixedS: return "vcvt";
890 case ARMneon_VCVTFixedUtoF: return "vcvt";
891 case ARMneon_VCVTFixedStoF: return "vcvt";
892 case ARMneon_VCVTF32toF16: return "vcvt";
893 case ARMneon_VCVTF16toF32: return "vcvt";
894 case ARMneon_VRECIP: return "vrecip";
895 case ARMneon_VRECIPF: return "vrecipf";
896 case ARMneon_VNEGF: return "vneg";
897 case ARMneon_ABS: return "vabs";
898 case ARMneon_VABSFP: return "vabsfp";
899 case ARMneon_VRSQRTEFP: return "vrsqrtefp";
900 case ARMneon_VRSQRTE: return "vrsqrte";
901 /* ... */
902 default: vpanic("showARMNeonUnOp");
903 }
904 }
905
showARMNeonUnOpDataType(ARMNeonUnOp op)906 const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
907 switch (op) {
908 case ARMneon_COPY:
909 case ARMneon_NOT:
910 return "";
911 case ARMneon_COPYN:
912 case ARMneon_EQZ:
913 case ARMneon_CNT:
914 case ARMneon_DUP:
915 case ARMneon_REV16:
916 case ARMneon_REV32:
917 case ARMneon_REV64:
918 return ".i";
919 case ARMneon_COPYLU:
920 case ARMneon_PADDLU:
921 case ARMneon_COPYQNUU:
922 case ARMneon_VQSHLNUU:
923 case ARMneon_VRECIP:
924 case ARMneon_VRSQRTE:
925 return ".u";
926 case ARMneon_CLS:
927 case ARMneon_CLZ:
928 case ARMneon_COPYLS:
929 case ARMneon_PADDLS:
930 case ARMneon_COPYQNSS:
931 case ARMneon_COPYQNUS:
932 case ARMneon_VQSHLNSS:
933 case ARMneon_VQSHLNUS:
934 case ARMneon_ABS:
935 return ".s";
936 case ARMneon_VRECIPF:
937 case ARMneon_VNEGF:
938 case ARMneon_VABSFP:
939 case ARMneon_VRSQRTEFP:
940 return ".f";
941 case ARMneon_VCVTFtoU: return ".u32.f32";
942 case ARMneon_VCVTFtoS: return ".s32.f32";
943 case ARMneon_VCVTUtoF: return ".f32.u32";
944 case ARMneon_VCVTStoF: return ".f32.s32";
945 case ARMneon_VCVTF16toF32: return ".f32.f16";
946 case ARMneon_VCVTF32toF16: return ".f16.f32";
947 case ARMneon_VCVTFtoFixedU: return ".u32.f32";
948 case ARMneon_VCVTFtoFixedS: return ".s32.f32";
949 case ARMneon_VCVTFixedUtoF: return ".f32.u32";
950 case ARMneon_VCVTFixedStoF: return ".f32.s32";
951 /* ... */
952 default: vpanic("showARMNeonUnOpDataType");
953 }
954 }
955
showARMNeonUnOpS(ARMNeonUnOpS op)956 const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
957 switch (op) {
958 case ARMneon_SETELEM: return "vmov";
959 case ARMneon_GETELEMU: return "vmov";
960 case ARMneon_GETELEMS: return "vmov";
961 case ARMneon_VDUP: return "vdup";
962 /* ... */
963 default: vpanic("showARMNeonUnarySOp");
964 }
965 }
966
showARMNeonUnOpSDataType(ARMNeonUnOpS op)967 const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
968 switch (op) {
969 case ARMneon_SETELEM:
970 case ARMneon_VDUP:
971 return ".i";
972 case ARMneon_GETELEMS:
973 return ".s";
974 case ARMneon_GETELEMU:
975 return ".u";
976 /* ... */
977 default: vpanic("showARMNeonUnarySOp");
978 }
979 }
980
showARMNeonShiftOp(ARMNeonShiftOp op)981 const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
982 switch (op) {
983 case ARMneon_VSHL: return "vshl";
984 case ARMneon_VSAL: return "vshl";
985 case ARMneon_VQSHL: return "vqshl";
986 case ARMneon_VQSAL: return "vqshl";
987 /* ... */
988 default: vpanic("showARMNeonShiftOp");
989 }
990 }
991
showARMNeonShiftOpDataType(ARMNeonShiftOp op)992 const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
993 switch (op) {
994 case ARMneon_VSHL:
995 case ARMneon_VQSHL:
996 return ".u";
997 case ARMneon_VSAL:
998 case ARMneon_VQSAL:
999 return ".s";
1000 /* ... */
1001 default: vpanic("showARMNeonShiftOpDataType");
1002 }
1003 }
1004
showARMNeonDualOp(ARMNeonDualOp op)1005 const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
1006 switch (op) {
1007 case ARMneon_TRN: return "vtrn";
1008 case ARMneon_ZIP: return "vzip";
1009 case ARMneon_UZP: return "vuzp";
1010 /* ... */
1011 default: vpanic("showARMNeonDualOp");
1012 }
1013 }
1014
showARMNeonDualOpDataType(ARMNeonDualOp op)1015 const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1016 switch (op) {
1017 case ARMneon_TRN:
1018 case ARMneon_ZIP:
1019 case ARMneon_UZP:
1020 return "i";
1021 /* ... */
1022 default: vpanic("showARMNeonDualOp");
1023 }
1024 }
1025
showARMNeonDataSize_wrk(UInt size)1026 static const HChar* showARMNeonDataSize_wrk ( UInt size )
1027 {
1028 switch (size) {
1029 case 0: return "8";
1030 case 1: return "16";
1031 case 2: return "32";
1032 case 3: return "64";
1033 default: vpanic("showARMNeonDataSize");
1034 }
1035 }
1036
showARMNeonDataSize(ARMInstr * i)1037 static const HChar* showARMNeonDataSize ( ARMInstr* i )
1038 {
1039 switch (i->tag) {
1040 case ARMin_NBinary:
1041 if (i->ARMin.NBinary.op == ARMneon_VEXT)
1042 return "8";
1043 if (i->ARMin.NBinary.op == ARMneon_VAND ||
1044 i->ARMin.NBinary.op == ARMneon_VORR ||
1045 i->ARMin.NBinary.op == ARMneon_VXOR)
1046 return "";
1047 return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1048 case ARMin_NUnary:
1049 if (i->ARMin.NUnary.op == ARMneon_COPY ||
1050 i->ARMin.NUnary.op == ARMneon_NOT ||
1051 i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1052 i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1053 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1054 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1055 i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1056 i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1057 i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1058 i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1059 i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1060 i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1061 return "";
1062 if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1063 i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1064 i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1065 UInt size;
1066 size = i->ARMin.NUnary.size;
1067 if (size & 0x40)
1068 return "64";
1069 if (size & 0x20)
1070 return "32";
1071 if (size & 0x10)
1072 return "16";
1073 if (size & 0x08)
1074 return "8";
1075 vpanic("showARMNeonDataSize");
1076 }
1077 return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1078 case ARMin_NUnaryS:
1079 if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1080 int size;
1081 size = i->ARMin.NUnaryS.size;
1082 if ((size & 1) == 1)
1083 return "8";
1084 if ((size & 3) == 2)
1085 return "16";
1086 if ((size & 7) == 4)
1087 return "32";
1088 vpanic("showARMNeonDataSize");
1089 }
1090 return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1091 case ARMin_NShift:
1092 return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1093 case ARMin_NDual:
1094 return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1095 default:
1096 vpanic("showARMNeonDataSize");
1097 }
1098 }
1099
ARMInstr_Alu(ARMAluOp op,HReg dst,HReg argL,ARMRI84 * argR)1100 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
1101 HReg dst, HReg argL, ARMRI84* argR ) {
1102 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1103 i->tag = ARMin_Alu;
1104 i->ARMin.Alu.op = op;
1105 i->ARMin.Alu.dst = dst;
1106 i->ARMin.Alu.argL = argL;
1107 i->ARMin.Alu.argR = argR;
1108 return i;
1109 }
ARMInstr_Shift(ARMShiftOp op,HReg dst,HReg argL,ARMRI5 * argR)1110 ARMInstr* ARMInstr_Shift ( ARMShiftOp op,
1111 HReg dst, HReg argL, ARMRI5* argR ) {
1112 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1113 i->tag = ARMin_Shift;
1114 i->ARMin.Shift.op = op;
1115 i->ARMin.Shift.dst = dst;
1116 i->ARMin.Shift.argL = argL;
1117 i->ARMin.Shift.argR = argR;
1118 return i;
1119 }
ARMInstr_Unary(ARMUnaryOp op,HReg dst,HReg src)1120 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
1121 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1122 i->tag = ARMin_Unary;
1123 i->ARMin.Unary.op = op;
1124 i->ARMin.Unary.dst = dst;
1125 i->ARMin.Unary.src = src;
1126 return i;
1127 }
ARMInstr_CmpOrTst(Bool isCmp,HReg argL,ARMRI84 * argR)1128 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
1129 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1130 i->tag = ARMin_CmpOrTst;
1131 i->ARMin.CmpOrTst.isCmp = isCmp;
1132 i->ARMin.CmpOrTst.argL = argL;
1133 i->ARMin.CmpOrTst.argR = argR;
1134 return i;
1135 }
ARMInstr_Mov(HReg dst,ARMRI84 * src)1136 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
1137 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1138 i->tag = ARMin_Mov;
1139 i->ARMin.Mov.dst = dst;
1140 i->ARMin.Mov.src = src;
1141 return i;
1142 }
ARMInstr_Imm32(HReg dst,UInt imm32)1143 ARMInstr* ARMInstr_Imm32 ( HReg dst, UInt imm32 ) {
1144 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1145 i->tag = ARMin_Imm32;
1146 i->ARMin.Imm32.dst = dst;
1147 i->ARMin.Imm32.imm32 = imm32;
1148 return i;
1149 }
ARMInstr_LdSt32(ARMCondCode cc,Bool isLoad,HReg rD,ARMAMode1 * amode)1150 ARMInstr* ARMInstr_LdSt32 ( ARMCondCode cc,
1151 Bool isLoad, HReg rD, ARMAMode1* amode ) {
1152 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1153 i->tag = ARMin_LdSt32;
1154 i->ARMin.LdSt32.cc = cc;
1155 i->ARMin.LdSt32.isLoad = isLoad;
1156 i->ARMin.LdSt32.rD = rD;
1157 i->ARMin.LdSt32.amode = amode;
1158 vassert(cc != ARMcc_NV);
1159 return i;
1160 }
ARMInstr_LdSt16(ARMCondCode cc,Bool isLoad,Bool signedLoad,HReg rD,ARMAMode2 * amode)1161 ARMInstr* ARMInstr_LdSt16 ( ARMCondCode cc,
1162 Bool isLoad, Bool signedLoad,
1163 HReg rD, ARMAMode2* amode ) {
1164 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1165 i->tag = ARMin_LdSt16;
1166 i->ARMin.LdSt16.cc = cc;
1167 i->ARMin.LdSt16.isLoad = isLoad;
1168 i->ARMin.LdSt16.signedLoad = signedLoad;
1169 i->ARMin.LdSt16.rD = rD;
1170 i->ARMin.LdSt16.amode = amode;
1171 vassert(cc != ARMcc_NV);
1172 return i;
1173 }
ARMInstr_LdSt8U(ARMCondCode cc,Bool isLoad,HReg rD,ARMAMode1 * amode)1174 ARMInstr* ARMInstr_LdSt8U ( ARMCondCode cc,
1175 Bool isLoad, HReg rD, ARMAMode1* amode ) {
1176 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1177 i->tag = ARMin_LdSt8U;
1178 i->ARMin.LdSt8U.cc = cc;
1179 i->ARMin.LdSt8U.isLoad = isLoad;
1180 i->ARMin.LdSt8U.rD = rD;
1181 i->ARMin.LdSt8U.amode = amode;
1182 vassert(cc != ARMcc_NV);
1183 return i;
1184 }
ARMInstr_Ld8S(ARMCondCode cc,HReg rD,ARMAMode2 * amode)1185 ARMInstr* ARMInstr_Ld8S ( ARMCondCode cc, HReg rD, ARMAMode2* amode ) {
1186 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1187 i->tag = ARMin_Ld8S;
1188 i->ARMin.Ld8S.cc = cc;
1189 i->ARMin.Ld8S.rD = rD;
1190 i->ARMin.Ld8S.amode = amode;
1191 vassert(cc != ARMcc_NV);
1192 return i;
1193 }
ARMInstr_XDirect(Addr32 dstGA,ARMAMode1 * amR15T,ARMCondCode cond,Bool toFastEP)1194 ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
1195 ARMCondCode cond, Bool toFastEP ) {
1196 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1197 i->tag = ARMin_XDirect;
1198 i->ARMin.XDirect.dstGA = dstGA;
1199 i->ARMin.XDirect.amR15T = amR15T;
1200 i->ARMin.XDirect.cond = cond;
1201 i->ARMin.XDirect.toFastEP = toFastEP;
1202 return i;
1203 }
ARMInstr_XIndir(HReg dstGA,ARMAMode1 * amR15T,ARMCondCode cond)1204 ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
1205 ARMCondCode cond ) {
1206 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1207 i->tag = ARMin_XIndir;
1208 i->ARMin.XIndir.dstGA = dstGA;
1209 i->ARMin.XIndir.amR15T = amR15T;
1210 i->ARMin.XIndir.cond = cond;
1211 return i;
1212 }
ARMInstr_XAssisted(HReg dstGA,ARMAMode1 * amR15T,ARMCondCode cond,IRJumpKind jk)1213 ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
1214 ARMCondCode cond, IRJumpKind jk ) {
1215 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1216 i->tag = ARMin_XAssisted;
1217 i->ARMin.XAssisted.dstGA = dstGA;
1218 i->ARMin.XAssisted.amR15T = amR15T;
1219 i->ARMin.XAssisted.cond = cond;
1220 i->ARMin.XAssisted.jk = jk;
1221 return i;
1222 }
ARMInstr_CMov(ARMCondCode cond,HReg dst,ARMRI84 * src)1223 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
1224 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1225 i->tag = ARMin_CMov;
1226 i->ARMin.CMov.cond = cond;
1227 i->ARMin.CMov.dst = dst;
1228 i->ARMin.CMov.src = src;
1229 vassert(cond != ARMcc_AL);
1230 return i;
1231 }
ARMInstr_Call(ARMCondCode cond,HWord target,Int nArgRegs,RetLoc rloc)1232 ARMInstr* ARMInstr_Call ( ARMCondCode cond, HWord target, Int nArgRegs,
1233 RetLoc rloc ) {
1234 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1235 i->tag = ARMin_Call;
1236 i->ARMin.Call.cond = cond;
1237 i->ARMin.Call.target = target;
1238 i->ARMin.Call.nArgRegs = nArgRegs;
1239 i->ARMin.Call.rloc = rloc;
1240 vassert(is_sane_RetLoc(rloc));
1241 return i;
1242 }
ARMInstr_Mul(ARMMulOp op)1243 ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
1244 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1245 i->tag = ARMin_Mul;
1246 i->ARMin.Mul.op = op;
1247 return i;
1248 }
ARMInstr_LdrEX(Int szB)1249 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
1250 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1251 i->tag = ARMin_LdrEX;
1252 i->ARMin.LdrEX.szB = szB;
1253 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1254 return i;
1255 }
ARMInstr_StrEX(Int szB)1256 ARMInstr* ARMInstr_StrEX ( Int szB ) {
1257 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1258 i->tag = ARMin_StrEX;
1259 i->ARMin.StrEX.szB = szB;
1260 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1261 return i;
1262 }
ARMInstr_VLdStD(Bool isLoad,HReg dD,ARMAModeV * am)1263 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
1264 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1265 i->tag = ARMin_VLdStD;
1266 i->ARMin.VLdStD.isLoad = isLoad;
1267 i->ARMin.VLdStD.dD = dD;
1268 i->ARMin.VLdStD.amode = am;
1269 return i;
1270 }
ARMInstr_VLdStS(Bool isLoad,HReg fD,ARMAModeV * am)1271 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
1272 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1273 i->tag = ARMin_VLdStS;
1274 i->ARMin.VLdStS.isLoad = isLoad;
1275 i->ARMin.VLdStS.fD = fD;
1276 i->ARMin.VLdStS.amode = am;
1277 return i;
1278 }
ARMInstr_VAluD(ARMVfpOp op,HReg dst,HReg argL,HReg argR)1279 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1280 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1281 i->tag = ARMin_VAluD;
1282 i->ARMin.VAluD.op = op;
1283 i->ARMin.VAluD.dst = dst;
1284 i->ARMin.VAluD.argL = argL;
1285 i->ARMin.VAluD.argR = argR;
1286 return i;
1287 }
ARMInstr_VAluS(ARMVfpOp op,HReg dst,HReg argL,HReg argR)1288 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1289 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1290 i->tag = ARMin_VAluS;
1291 i->ARMin.VAluS.op = op;
1292 i->ARMin.VAluS.dst = dst;
1293 i->ARMin.VAluS.argL = argL;
1294 i->ARMin.VAluS.argR = argR;
1295 return i;
1296 }
ARMInstr_VUnaryD(ARMVfpUnaryOp op,HReg dst,HReg src)1297 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1298 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1299 i->tag = ARMin_VUnaryD;
1300 i->ARMin.VUnaryD.op = op;
1301 i->ARMin.VUnaryD.dst = dst;
1302 i->ARMin.VUnaryD.src = src;
1303 return i;
1304 }
ARMInstr_VUnaryS(ARMVfpUnaryOp op,HReg dst,HReg src)1305 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1306 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1307 i->tag = ARMin_VUnaryS;
1308 i->ARMin.VUnaryS.op = op;
1309 i->ARMin.VUnaryS.dst = dst;
1310 i->ARMin.VUnaryS.src = src;
1311 return i;
1312 }
ARMInstr_VCmpD(HReg argL,HReg argR)1313 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
1314 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1315 i->tag = ARMin_VCmpD;
1316 i->ARMin.VCmpD.argL = argL;
1317 i->ARMin.VCmpD.argR = argR;
1318 return i;
1319 }
ARMInstr_VCMovD(ARMCondCode cond,HReg dst,HReg src)1320 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1321 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1322 i->tag = ARMin_VCMovD;
1323 i->ARMin.VCMovD.cond = cond;
1324 i->ARMin.VCMovD.dst = dst;
1325 i->ARMin.VCMovD.src = src;
1326 vassert(cond != ARMcc_AL);
1327 return i;
1328 }
ARMInstr_VCMovS(ARMCondCode cond,HReg dst,HReg src)1329 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1330 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1331 i->tag = ARMin_VCMovS;
1332 i->ARMin.VCMovS.cond = cond;
1333 i->ARMin.VCMovS.dst = dst;
1334 i->ARMin.VCMovS.src = src;
1335 vassert(cond != ARMcc_AL);
1336 return i;
1337 }
ARMInstr_VCvtSD(Bool sToD,HReg dst,HReg src)1338 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1339 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1340 i->tag = ARMin_VCvtSD;
1341 i->ARMin.VCvtSD.sToD = sToD;
1342 i->ARMin.VCvtSD.dst = dst;
1343 i->ARMin.VCvtSD.src = src;
1344 return i;
1345 }
ARMInstr_VXferD(Bool toD,HReg dD,HReg rHi,HReg rLo)1346 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1347 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1348 i->tag = ARMin_VXferD;
1349 i->ARMin.VXferD.toD = toD;
1350 i->ARMin.VXferD.dD = dD;
1351 i->ARMin.VXferD.rHi = rHi;
1352 i->ARMin.VXferD.rLo = rLo;
1353 return i;
1354 }
ARMInstr_VXferS(Bool toS,HReg fD,HReg rLo)1355 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1356 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1357 i->tag = ARMin_VXferS;
1358 i->ARMin.VXferS.toS = toS;
1359 i->ARMin.VXferS.fD = fD;
1360 i->ARMin.VXferS.rLo = rLo;
1361 return i;
1362 }
ARMInstr_VCvtID(Bool iToD,Bool syned,HReg dst,HReg src)1363 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1364 HReg dst, HReg src ) {
1365 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1366 i->tag = ARMin_VCvtID;
1367 i->ARMin.VCvtID.iToD = iToD;
1368 i->ARMin.VCvtID.syned = syned;
1369 i->ARMin.VCvtID.dst = dst;
1370 i->ARMin.VCvtID.src = src;
1371 return i;
1372 }
ARMInstr_FPSCR(Bool toFPSCR,HReg iReg)1373 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
1374 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1375 i->tag = ARMin_FPSCR;
1376 i->ARMin.FPSCR.toFPSCR = toFPSCR;
1377 i->ARMin.FPSCR.iReg = iReg;
1378 return i;
1379 }
ARMInstr_MFence(void)1380 ARMInstr* ARMInstr_MFence ( void ) {
1381 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1382 i->tag = ARMin_MFence;
1383 return i;
1384 }
ARMInstr_CLREX(void)1385 ARMInstr* ARMInstr_CLREX( void ) {
1386 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1387 i->tag = ARMin_CLREX;
1388 return i;
1389 }
1390
ARMInstr_NLdStQ(Bool isLoad,HReg dQ,ARMAModeN * amode)1391 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
1392 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1393 i->tag = ARMin_NLdStQ;
1394 i->ARMin.NLdStQ.isLoad = isLoad;
1395 i->ARMin.NLdStQ.dQ = dQ;
1396 i->ARMin.NLdStQ.amode = amode;
1397 return i;
1398 }
1399
ARMInstr_NLdStD(Bool isLoad,HReg dD,ARMAModeN * amode)1400 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1401 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1402 i->tag = ARMin_NLdStD;
1403 i->ARMin.NLdStD.isLoad = isLoad;
1404 i->ARMin.NLdStD.dD = dD;
1405 i->ARMin.NLdStD.amode = amode;
1406 return i;
1407 }
1408
ARMInstr_NUnary(ARMNeonUnOp op,HReg dQ,HReg nQ,UInt size,Bool Q)1409 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1410 UInt size, Bool Q ) {
1411 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1412 i->tag = ARMin_NUnary;
1413 i->ARMin.NUnary.op = op;
1414 i->ARMin.NUnary.src = nQ;
1415 i->ARMin.NUnary.dst = dQ;
1416 i->ARMin.NUnary.size = size;
1417 i->ARMin.NUnary.Q = Q;
1418 return i;
1419 }
1420
ARMInstr_NUnaryS(ARMNeonUnOpS op,ARMNRS * dst,ARMNRS * src,UInt size,Bool Q)1421 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
1422 UInt size, Bool Q ) {
1423 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1424 i->tag = ARMin_NUnaryS;
1425 i->ARMin.NUnaryS.op = op;
1426 i->ARMin.NUnaryS.src = src;
1427 i->ARMin.NUnaryS.dst = dst;
1428 i->ARMin.NUnaryS.size = size;
1429 i->ARMin.NUnaryS.Q = Q;
1430 return i;
1431 }
1432
ARMInstr_NDual(ARMNeonDualOp op,HReg nQ,HReg mQ,UInt size,Bool Q)1433 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1434 UInt size, Bool Q ) {
1435 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1436 i->tag = ARMin_NDual;
1437 i->ARMin.NDual.op = op;
1438 i->ARMin.NDual.arg1 = nQ;
1439 i->ARMin.NDual.arg2 = mQ;
1440 i->ARMin.NDual.size = size;
1441 i->ARMin.NDual.Q = Q;
1442 return i;
1443 }
1444
ARMInstr_NBinary(ARMNeonBinOp op,HReg dst,HReg argL,HReg argR,UInt size,Bool Q)1445 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1446 HReg dst, HReg argL, HReg argR,
1447 UInt size, Bool Q ) {
1448 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1449 i->tag = ARMin_NBinary;
1450 i->ARMin.NBinary.op = op;
1451 i->ARMin.NBinary.argL = argL;
1452 i->ARMin.NBinary.argR = argR;
1453 i->ARMin.NBinary.dst = dst;
1454 i->ARMin.NBinary.size = size;
1455 i->ARMin.NBinary.Q = Q;
1456 return i;
1457 }
1458
ARMInstr_NeonImm(HReg dst,ARMNImm * imm)1459 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
1460 ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
1461 i->tag = ARMin_NeonImm;
1462 i->ARMin.NeonImm.dst = dst;
1463 i->ARMin.NeonImm.imm = imm;
1464 return i;
1465 }
1466
ARMInstr_NCMovQ(ARMCondCode cond,HReg dst,HReg src)1467 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1468 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1469 i->tag = ARMin_NCMovQ;
1470 i->ARMin.NCMovQ.cond = cond;
1471 i->ARMin.NCMovQ.dst = dst;
1472 i->ARMin.NCMovQ.src = src;
1473 vassert(cond != ARMcc_AL);
1474 return i;
1475 }
1476
ARMInstr_NShift(ARMNeonShiftOp op,HReg dst,HReg argL,HReg argR,UInt size,Bool Q)1477 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1478 HReg dst, HReg argL, HReg argR,
1479 UInt size, Bool Q ) {
1480 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1481 i->tag = ARMin_NShift;
1482 i->ARMin.NShift.op = op;
1483 i->ARMin.NShift.argL = argL;
1484 i->ARMin.NShift.argR = argR;
1485 i->ARMin.NShift.dst = dst;
1486 i->ARMin.NShift.size = size;
1487 i->ARMin.NShift.Q = Q;
1488 return i;
1489 }
1490
ARMInstr_NShl64(HReg dst,HReg src,UInt amt)1491 ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
1492 {
1493 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1494 i->tag = ARMin_NShl64;
1495 i->ARMin.NShl64.dst = dst;
1496 i->ARMin.NShl64.src = src;
1497 i->ARMin.NShl64.amt = amt;
1498 vassert(amt >= 1 && amt <= 63);
1499 return i;
1500 }
1501
1502 /* Helper copy-pasted from isel.c */
fitsIn8x4(UInt * u8,UInt * u4,UInt u)1503 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1504 {
1505 UInt i;
1506 for (i = 0; i < 16; i++) {
1507 if (0 == (u & 0xFFFFFF00)) {
1508 *u8 = u;
1509 *u4 = i;
1510 return True;
1511 }
1512 u = ROR32(u, 30);
1513 }
1514 vassert(i == 16);
1515 return False;
1516 }
1517
ARMInstr_Add32(HReg rD,HReg rN,UInt imm32)1518 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1519 UInt u8, u4;
1520 ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
1521 /* Try to generate single ADD if possible */
1522 if (fitsIn8x4(&u8, &u4, imm32)) {
1523 i->tag = ARMin_Alu;
1524 i->ARMin.Alu.op = ARMalu_ADD;
1525 i->ARMin.Alu.dst = rD;
1526 i->ARMin.Alu.argL = rN;
1527 i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1528 } else {
1529 i->tag = ARMin_Add32;
1530 i->ARMin.Add32.rD = rD;
1531 i->ARMin.Add32.rN = rN;
1532 i->ARMin.Add32.imm32 = imm32;
1533 }
1534 return i;
1535 }
1536
ARMInstr_EvCheck(ARMAMode1 * amCounter,ARMAMode1 * amFailAddr)1537 ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
1538 ARMAMode1* amFailAddr ) {
1539 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1540 i->tag = ARMin_EvCheck;
1541 i->ARMin.EvCheck.amCounter = amCounter;
1542 i->ARMin.EvCheck.amFailAddr = amFailAddr;
1543 return i;
1544 }
1545
ARMInstr_ProfInc(void)1546 ARMInstr* ARMInstr_ProfInc ( void ) {
1547 ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1548 i->tag = ARMin_ProfInc;
1549 return i;
1550 }
1551
1552 /* ... */
1553
ppARMInstr(ARMInstr * i)1554 void ppARMInstr ( ARMInstr* i ) {
1555 switch (i->tag) {
1556 case ARMin_Alu:
1557 vex_printf("%-4s ", showARMAluOp(i->ARMin.Alu.op));
1558 ppHRegARM(i->ARMin.Alu.dst);
1559 vex_printf(", ");
1560 ppHRegARM(i->ARMin.Alu.argL);
1561 vex_printf(", ");
1562 ppARMRI84(i->ARMin.Alu.argR);
1563 return;
1564 case ARMin_Shift:
1565 vex_printf("%s ", showARMShiftOp(i->ARMin.Shift.op));
1566 ppHRegARM(i->ARMin.Shift.dst);
1567 vex_printf(", ");
1568 ppHRegARM(i->ARMin.Shift.argL);
1569 vex_printf(", ");
1570 ppARMRI5(i->ARMin.Shift.argR);
1571 return;
1572 case ARMin_Unary:
1573 vex_printf("%s ", showARMUnaryOp(i->ARMin.Unary.op));
1574 ppHRegARM(i->ARMin.Unary.dst);
1575 vex_printf(", ");
1576 ppHRegARM(i->ARMin.Unary.src);
1577 return;
1578 case ARMin_CmpOrTst:
1579 vex_printf("%s ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
1580 ppHRegARM(i->ARMin.CmpOrTst.argL);
1581 vex_printf(", ");
1582 ppARMRI84(i->ARMin.CmpOrTst.argR);
1583 return;
1584 case ARMin_Mov:
1585 vex_printf("mov ");
1586 ppHRegARM(i->ARMin.Mov.dst);
1587 vex_printf(", ");
1588 ppARMRI84(i->ARMin.Mov.src);
1589 return;
1590 case ARMin_Imm32:
1591 vex_printf("imm ");
1592 ppHRegARM(i->ARMin.Imm32.dst);
1593 vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
1594 return;
1595 case ARMin_LdSt32:
1596 if (i->ARMin.LdSt32.isLoad) {
1597 vex_printf("ldr%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? " "
1598 : showARMCondCode(i->ARMin.LdSt32.cc));
1599 ppHRegARM(i->ARMin.LdSt32.rD);
1600 vex_printf(", ");
1601 ppARMAMode1(i->ARMin.LdSt32.amode);
1602 } else {
1603 vex_printf("str%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? " "
1604 : showARMCondCode(i->ARMin.LdSt32.cc));
1605 ppARMAMode1(i->ARMin.LdSt32.amode);
1606 vex_printf(", ");
1607 ppHRegARM(i->ARMin.LdSt32.rD);
1608 }
1609 return;
1610 case ARMin_LdSt16:
1611 if (i->ARMin.LdSt16.isLoad) {
1612 vex_printf("%s%s%s",
1613 i->ARMin.LdSt16.signedLoad ? "ldrsh" : "ldrh",
1614 i->ARMin.LdSt16.cc == ARMcc_AL ? " "
1615 : showARMCondCode(i->ARMin.LdSt16.cc),
1616 i->ARMin.LdSt16.signedLoad ? " " : " ");
1617 ppHRegARM(i->ARMin.LdSt16.rD);
1618 vex_printf(", ");
1619 ppARMAMode2(i->ARMin.LdSt16.amode);
1620 } else {
1621 vex_printf("strh%s ",
1622 i->ARMin.LdSt16.cc == ARMcc_AL ? " "
1623 : showARMCondCode(i->ARMin.LdSt16.cc));
1624 ppARMAMode2(i->ARMin.LdSt16.amode);
1625 vex_printf(", ");
1626 ppHRegARM(i->ARMin.LdSt16.rD);
1627 }
1628 return;
1629 case ARMin_LdSt8U:
1630 if (i->ARMin.LdSt8U.isLoad) {
1631 vex_printf("ldrb%s ", i->ARMin.LdSt8U.cc == ARMcc_AL ? " "
1632 : showARMCondCode(i->ARMin.LdSt8U.cc));
1633 ppHRegARM(i->ARMin.LdSt8U.rD);
1634 vex_printf(", ");
1635 ppARMAMode1(i->ARMin.LdSt8U.amode);
1636 } else {
1637 vex_printf("strb%s ", i->ARMin.LdSt8U.cc == ARMcc_AL ? " "
1638 : showARMCondCode(i->ARMin.LdSt8U.cc));
1639 ppARMAMode1(i->ARMin.LdSt8U.amode);
1640 vex_printf(", ");
1641 ppHRegARM(i->ARMin.LdSt8U.rD);
1642 }
1643 return;
1644 case ARMin_Ld8S:
1645 vex_printf("ldrsb%s ", i->ARMin.Ld8S.cc == ARMcc_AL ? " "
1646 : showARMCondCode(i->ARMin.Ld8S.cc));
1647 ppARMAMode2(i->ARMin.Ld8S.amode);
1648 vex_printf(", ");
1649 ppHRegARM(i->ARMin.Ld8S.rD);
1650 return;
1651 case ARMin_XDirect:
1652 vex_printf("(xDirect) ");
1653 vex_printf("if (%%cpsr.%s) { ",
1654 showARMCondCode(i->ARMin.XDirect.cond));
1655 vex_printf("movw r12,0x%x; ",
1656 (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
1657 vex_printf("movt r12,0x%x; ",
1658 (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
1659 vex_printf("str r12,");
1660 ppARMAMode1(i->ARMin.XDirect.amR15T);
1661 vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
1662 i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1663 vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
1664 i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1665 vex_printf("blx r12 }");
1666 return;
1667 case ARMin_XIndir:
1668 vex_printf("(xIndir) ");
1669 vex_printf("if (%%cpsr.%s) { ",
1670 showARMCondCode(i->ARMin.XIndir.cond));
1671 vex_printf("str ");
1672 ppHRegARM(i->ARMin.XIndir.dstGA);
1673 vex_printf(",");
1674 ppARMAMode1(i->ARMin.XIndir.amR15T);
1675 vex_printf("; movw r12,LO16($disp_cp_xindir); ");
1676 vex_printf("movt r12,HI16($disp_cp_xindir); ");
1677 vex_printf("blx r12 }");
1678 return;
1679 case ARMin_XAssisted:
1680 vex_printf("(xAssisted) ");
1681 vex_printf("if (%%cpsr.%s) { ",
1682 showARMCondCode(i->ARMin.XAssisted.cond));
1683 vex_printf("str ");
1684 ppHRegARM(i->ARMin.XAssisted.dstGA);
1685 vex_printf(",");
1686 ppARMAMode1(i->ARMin.XAssisted.amR15T);
1687 vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
1688 (Int)i->ARMin.XAssisted.jk);
1689 vex_printf("movw r12,LO16($disp_cp_xassisted); ");
1690 vex_printf("movt r12,HI16($disp_cp_xassisted); ");
1691 vex_printf("blx r12 }");
1692 return;
1693 case ARMin_CMov:
1694 vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
1695 ppHRegARM(i->ARMin.CMov.dst);
1696 vex_printf(", ");
1697 ppARMRI84(i->ARMin.CMov.src);
1698 return;
1699 case ARMin_Call:
1700 vex_printf("call%s ",
1701 i->ARMin.Call.cond==ARMcc_AL
1702 ? "" : showARMCondCode(i->ARMin.Call.cond));
1703 vex_printf("0x%lx [nArgRegs=%d, ",
1704 i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
1705 ppRetLoc(i->ARMin.Call.rloc);
1706 vex_printf("]");
1707 return;
1708 case ARMin_Mul:
1709 vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
1710 if (i->ARMin.Mul.op == ARMmul_PLAIN) {
1711 vex_printf("r0, r2, r3");
1712 } else {
1713 vex_printf("r1:r0, r2, r3");
1714 }
1715 return;
1716 case ARMin_LdrEX: {
1717 const HChar* sz = "";
1718 switch (i->ARMin.LdrEX.szB) {
1719 case 1: sz = "b"; break; case 2: sz = "h"; break;
1720 case 8: sz = "d"; break; case 4: break;
1721 default: vassert(0);
1722 }
1723 vex_printf("ldrex%s %sr2, [r4]",
1724 sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
1725 return;
1726 }
1727 case ARMin_StrEX: {
1728 const HChar* sz = "";
1729 switch (i->ARMin.StrEX.szB) {
1730 case 1: sz = "b"; break; case 2: sz = "h"; break;
1731 case 8: sz = "d"; break; case 4: break;
1732 default: vassert(0);
1733 }
1734 vex_printf("strex%s r0, %sr2, [r4]",
1735 sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
1736 return;
1737 }
1738 case ARMin_VLdStD:
1739 if (i->ARMin.VLdStD.isLoad) {
1740 vex_printf("fldd ");
1741 ppHRegARM(i->ARMin.VLdStD.dD);
1742 vex_printf(", ");
1743 ppARMAModeV(i->ARMin.VLdStD.amode);
1744 } else {
1745 vex_printf("fstd ");
1746 ppARMAModeV(i->ARMin.VLdStD.amode);
1747 vex_printf(", ");
1748 ppHRegARM(i->ARMin.VLdStD.dD);
1749 }
1750 return;
1751 case ARMin_VLdStS:
1752 if (i->ARMin.VLdStS.isLoad) {
1753 vex_printf("flds ");
1754 ppHRegARM(i->ARMin.VLdStS.fD);
1755 vex_printf(", ");
1756 ppARMAModeV(i->ARMin.VLdStS.amode);
1757 } else {
1758 vex_printf("fsts ");
1759 ppARMAModeV(i->ARMin.VLdStS.amode);
1760 vex_printf(", ");
1761 ppHRegARM(i->ARMin.VLdStS.fD);
1762 }
1763 return;
1764 case ARMin_VAluD:
1765 vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
1766 ppHRegARM(i->ARMin.VAluD.dst);
1767 vex_printf(", ");
1768 ppHRegARM(i->ARMin.VAluD.argL);
1769 vex_printf(", ");
1770 ppHRegARM(i->ARMin.VAluD.argR);
1771 return;
1772 case ARMin_VAluS:
1773 vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
1774 ppHRegARM(i->ARMin.VAluS.dst);
1775 vex_printf(", ");
1776 ppHRegARM(i->ARMin.VAluS.argL);
1777 vex_printf(", ");
1778 ppHRegARM(i->ARMin.VAluS.argR);
1779 return;
1780 case ARMin_VUnaryD:
1781 vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
1782 ppHRegARM(i->ARMin.VUnaryD.dst);
1783 vex_printf(", ");
1784 ppHRegARM(i->ARMin.VUnaryD.src);
1785 return;
1786 case ARMin_VUnaryS:
1787 vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
1788 ppHRegARM(i->ARMin.VUnaryS.dst);
1789 vex_printf(", ");
1790 ppHRegARM(i->ARMin.VUnaryS.src);
1791 return;
1792 case ARMin_VCmpD:
1793 vex_printf("fcmpd ");
1794 ppHRegARM(i->ARMin.VCmpD.argL);
1795 vex_printf(", ");
1796 ppHRegARM(i->ARMin.VCmpD.argR);
1797 vex_printf(" ; fmstat");
1798 return;
1799 case ARMin_VCMovD:
1800 vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
1801 ppHRegARM(i->ARMin.VCMovD.dst);
1802 vex_printf(", ");
1803 ppHRegARM(i->ARMin.VCMovD.src);
1804 return;
1805 case ARMin_VCMovS:
1806 vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
1807 ppHRegARM(i->ARMin.VCMovS.dst);
1808 vex_printf(", ");
1809 ppHRegARM(i->ARMin.VCMovS.src);
1810 return;
1811 case ARMin_VCvtSD:
1812 vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
1813 ppHRegARM(i->ARMin.VCvtSD.dst);
1814 vex_printf(", ");
1815 ppHRegARM(i->ARMin.VCvtSD.src);
1816 return;
1817 case ARMin_VXferD:
1818 vex_printf("vmov ");
1819 if (i->ARMin.VXferD.toD) {
1820 ppHRegARM(i->ARMin.VXferD.dD);
1821 vex_printf(", ");
1822 ppHRegARM(i->ARMin.VXferD.rLo);
1823 vex_printf(", ");
1824 ppHRegARM(i->ARMin.VXferD.rHi);
1825 } else {
1826 ppHRegARM(i->ARMin.VXferD.rLo);
1827 vex_printf(", ");
1828 ppHRegARM(i->ARMin.VXferD.rHi);
1829 vex_printf(", ");
1830 ppHRegARM(i->ARMin.VXferD.dD);
1831 }
1832 return;
1833 case ARMin_VXferS:
1834 vex_printf("vmov ");
1835 if (i->ARMin.VXferS.toS) {
1836 ppHRegARM(i->ARMin.VXferS.fD);
1837 vex_printf(", ");
1838 ppHRegARM(i->ARMin.VXferS.rLo);
1839 } else {
1840 ppHRegARM(i->ARMin.VXferS.rLo);
1841 vex_printf(", ");
1842 ppHRegARM(i->ARMin.VXferS.fD);
1843 }
1844 return;
1845 case ARMin_VCvtID: {
1846 const HChar* nm = "?";
1847 if (i->ARMin.VCvtID.iToD) {
1848 nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
1849 } else {
1850 nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
1851 }
1852 vex_printf("%s ", nm);
1853 ppHRegARM(i->ARMin.VCvtID.dst);
1854 vex_printf(", ");
1855 ppHRegARM(i->ARMin.VCvtID.src);
1856 return;
1857 }
1858 case ARMin_FPSCR:
1859 if (i->ARMin.FPSCR.toFPSCR) {
1860 vex_printf("fmxr fpscr, ");
1861 ppHRegARM(i->ARMin.FPSCR.iReg);
1862 } else {
1863 vex_printf("fmrx ");
1864 ppHRegARM(i->ARMin.FPSCR.iReg);
1865 vex_printf(", fpscr");
1866 }
1867 return;
1868 case ARMin_MFence:
1869 vex_printf("(mfence) dsb sy; dmb sy; isb");
1870 return;
1871 case ARMin_CLREX:
1872 vex_printf("clrex");
1873 return;
1874 case ARMin_NLdStQ:
1875 if (i->ARMin.NLdStQ.isLoad)
1876 vex_printf("vld1.32 {");
1877 else
1878 vex_printf("vst1.32 {");
1879 ppHRegARM(i->ARMin.NLdStQ.dQ);
1880 vex_printf("} ");
1881 ppARMAModeN(i->ARMin.NLdStQ.amode);
1882 return;
1883 case ARMin_NLdStD:
1884 if (i->ARMin.NLdStD.isLoad)
1885 vex_printf("vld1.32 {");
1886 else
1887 vex_printf("vst1.32 {");
1888 ppHRegARM(i->ARMin.NLdStD.dD);
1889 vex_printf("} ");
1890 ppARMAModeN(i->ARMin.NLdStD.amode);
1891 return;
1892 case ARMin_NUnary:
1893 vex_printf("%s%s%s ",
1894 showARMNeonUnOp(i->ARMin.NUnary.op),
1895 showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1896 showARMNeonDataSize(i));
1897 ppHRegARM(i->ARMin.NUnary.dst);
1898 vex_printf(", ");
1899 ppHRegARM(i->ARMin.NUnary.src);
1900 if (i->ARMin.NUnary.op == ARMneon_EQZ)
1901 vex_printf(", #0");
1902 if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1903 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1904 i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1905 i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
1906 vex_printf(", #%d", i->ARMin.NUnary.size);
1907 }
1908 if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1909 i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1910 i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1911 UInt size;
1912 size = i->ARMin.NUnary.size;
1913 if (size & 0x40) {
1914 vex_printf(", #%d", size - 64);
1915 } else if (size & 0x20) {
1916 vex_printf(", #%d", size - 32);
1917 } else if (size & 0x10) {
1918 vex_printf(", #%d", size - 16);
1919 } else if (size & 0x08) {
1920 vex_printf(", #%d", size - 8);
1921 }
1922 }
1923 return;
1924 case ARMin_NUnaryS:
1925 vex_printf("%s%s%s ",
1926 showARMNeonUnOpS(i->ARMin.NUnaryS.op),
1927 showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
1928 showARMNeonDataSize(i));
1929 ppARMNRS(i->ARMin.NUnaryS.dst);
1930 vex_printf(", ");
1931 ppARMNRS(i->ARMin.NUnaryS.src);
1932 return;
1933 case ARMin_NShift:
1934 vex_printf("%s%s%s ",
1935 showARMNeonShiftOp(i->ARMin.NShift.op),
1936 showARMNeonShiftOpDataType(i->ARMin.NShift.op),
1937 showARMNeonDataSize(i));
1938 ppHRegARM(i->ARMin.NShift.dst);
1939 vex_printf(", ");
1940 ppHRegARM(i->ARMin.NShift.argL);
1941 vex_printf(", ");
1942 ppHRegARM(i->ARMin.NShift.argR);
1943 return;
1944 case ARMin_NShl64:
1945 vex_printf("vshl.i64 ");
1946 ppHRegARM(i->ARMin.NShl64.dst);
1947 vex_printf(", ");
1948 ppHRegARM(i->ARMin.NShl64.src);
1949 vex_printf(", #%u", i->ARMin.NShl64.amt);
1950 return;
1951 case ARMin_NDual:
1952 vex_printf("%s%s%s ",
1953 showARMNeonDualOp(i->ARMin.NDual.op),
1954 showARMNeonDualOpDataType(i->ARMin.NDual.op),
1955 showARMNeonDataSize(i));
1956 ppHRegARM(i->ARMin.NDual.arg1);
1957 vex_printf(", ");
1958 ppHRegARM(i->ARMin.NDual.arg2);
1959 return;
1960 case ARMin_NBinary:
1961 vex_printf("%s%s%s",
1962 showARMNeonBinOp(i->ARMin.NBinary.op),
1963 showARMNeonBinOpDataType(i->ARMin.NBinary.op),
1964 showARMNeonDataSize(i));
1965 vex_printf(" ");
1966 ppHRegARM(i->ARMin.NBinary.dst);
1967 vex_printf(", ");
1968 ppHRegARM(i->ARMin.NBinary.argL);
1969 vex_printf(", ");
1970 ppHRegARM(i->ARMin.NBinary.argR);
1971 return;
1972 case ARMin_NeonImm:
1973 vex_printf("vmov ");
1974 ppHRegARM(i->ARMin.NeonImm.dst);
1975 vex_printf(", ");
1976 ppARMNImm(i->ARMin.NeonImm.imm);
1977 return;
1978 case ARMin_NCMovQ:
1979 vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
1980 ppHRegARM(i->ARMin.NCMovQ.dst);
1981 vex_printf(", ");
1982 ppHRegARM(i->ARMin.NCMovQ.src);
1983 return;
1984 case ARMin_Add32:
1985 vex_printf("add32 ");
1986 ppHRegARM(i->ARMin.Add32.rD);
1987 vex_printf(", ");
1988 ppHRegARM(i->ARMin.Add32.rN);
1989 vex_printf(", ");
1990 vex_printf("%d", i->ARMin.Add32.imm32);
1991 return;
1992 case ARMin_EvCheck:
1993 vex_printf("(evCheck) ldr r12,");
1994 ppARMAMode1(i->ARMin.EvCheck.amCounter);
1995 vex_printf("; subs r12,r12,$1; str r12,");
1996 ppARMAMode1(i->ARMin.EvCheck.amCounter);
1997 vex_printf("; bpl nofail; ldr r12,");
1998 ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
1999 vex_printf("; bx r12; nofail:");
2000 return;
2001 case ARMin_ProfInc:
2002 vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
2003 "movw r12,HI16($NotKnownYet); "
2004 "ldr r11,[r12]; "
2005 "adds r11,r11,$1; "
2006 "str r11,[r12]; "
2007 "ldr r11,[r12+4]; "
2008 "adc r11,r11,$0; "
2009 "str r11,[r12+4]");
2010 return;
2011 default:
2012 vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
2013 vpanic("ppARMInstr(1)");
2014 return;
2015 }
2016 }
2017
2018
2019 /* --------- Helpers for register allocation. --------- */
2020
getRegUsage_ARMInstr(HRegUsage * u,ARMInstr * i,Bool mode64)2021 void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 )
2022 {
2023 vassert(mode64 == False);
2024 initHRegUsage(u);
2025 switch (i->tag) {
2026 case ARMin_Alu:
2027 addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
2028 addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
2029 addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
2030 return;
2031 case ARMin_Shift:
2032 addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
2033 addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
2034 addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
2035 return;
2036 case ARMin_Unary:
2037 addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
2038 addHRegUse(u, HRmRead, i->ARMin.Unary.src);
2039 return;
2040 case ARMin_CmpOrTst:
2041 addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
2042 addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
2043 return;
2044 case ARMin_Mov:
2045 addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
2046 addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
2047 return;
2048 case ARMin_Imm32:
2049 addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
2050 return;
2051 case ARMin_LdSt32:
2052 addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
2053 if (i->ARMin.LdSt32.isLoad) {
2054 addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
2055 if (i->ARMin.LdSt32.cc != ARMcc_AL)
2056 addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2057 } else {
2058 addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2059 }
2060 return;
2061 case ARMin_LdSt16:
2062 addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
2063 if (i->ARMin.LdSt16.isLoad) {
2064 addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
2065 if (i->ARMin.LdSt16.cc != ARMcc_AL)
2066 addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2067 } else {
2068 addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2069 }
2070 return;
2071 case ARMin_LdSt8U:
2072 addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
2073 if (i->ARMin.LdSt8U.isLoad) {
2074 addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
2075 if (i->ARMin.LdSt8U.cc != ARMcc_AL)
2076 addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2077 } else {
2078 addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2079 }
2080 return;
2081 case ARMin_Ld8S:
2082 addRegUsage_ARMAMode2(u, i->ARMin.Ld8S.amode);
2083 addHRegUse(u, HRmWrite, i->ARMin.Ld8S.rD);
2084 if (i->ARMin.Ld8S.cc != ARMcc_AL)
2085 addHRegUse(u, HRmRead, i->ARMin.Ld8S.rD);
2086 return;
2087 /* XDirect/XIndir/XAssisted are also a bit subtle. They
2088 conditionally exit the block. Hence we only need to list (1)
2089 the registers that they read, and (2) the registers that they
2090 write in the case where the block is not exited. (2) is
2091 empty, hence only (1) is relevant here. */
2092 case ARMin_XDirect:
2093 addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
2094 return;
2095 case ARMin_XIndir:
2096 addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
2097 addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
2098 return;
2099 case ARMin_XAssisted:
2100 addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
2101 addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
2102 return;
2103 case ARMin_CMov:
2104 addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
2105 addHRegUse(u, HRmRead, i->ARMin.CMov.dst);
2106 addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
2107 return;
2108 case ARMin_Call:
2109 /* logic and comments copied/modified from x86 back end */
2110 /* This is a bit subtle. */
2111 /* First off, claim it trashes all the caller-saved regs
2112 which fall within the register allocator's jurisdiction.
2113 These I believe to be r0,1,2,3. If it turns out that r9
2114 is also caller-saved, then we'll have to add that here
2115 too. */
2116 addHRegUse(u, HRmWrite, hregARM_R0());
2117 addHRegUse(u, HRmWrite, hregARM_R1());
2118 addHRegUse(u, HRmWrite, hregARM_R2());
2119 addHRegUse(u, HRmWrite, hregARM_R3());
2120 /* Now we have to state any parameter-carrying registers
2121 which might be read. This depends on nArgRegs. */
2122 switch (i->ARMin.Call.nArgRegs) {
2123 case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
2124 case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
2125 case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
2126 case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
2127 case 0: break;
2128 default: vpanic("getRegUsage_ARM:Call:regparms");
2129 }
2130 /* Finally, there is the issue that the insn trashes a
2131 register because the literal target address has to be
2132 loaded into a register. Fortunately, for the nArgRegs=
2133 0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
2134 this does not cause any further damage. For the
2135 nArgRegs=4 case, we'll have to choose another register
2136 arbitrarily since all the caller saved regs are used for
2137 parameters, and so we might as well choose r11.
2138 */
2139 if (i->ARMin.Call.nArgRegs == 4)
2140 addHRegUse(u, HRmWrite, hregARM_R11());
2141 /* Upshot of this is that the assembler really must observe
2142 the here-stated convention of which register to use as an
2143 address temporary, depending on nArgRegs: 0==r0,
2144 1==r1, 2==r2, 3==r3, 4==r11 */
2145 return;
2146 case ARMin_Mul:
2147 addHRegUse(u, HRmRead, hregARM_R2());
2148 addHRegUse(u, HRmRead, hregARM_R3());
2149 addHRegUse(u, HRmWrite, hregARM_R0());
2150 if (i->ARMin.Mul.op != ARMmul_PLAIN)
2151 addHRegUse(u, HRmWrite, hregARM_R1());
2152 return;
2153 case ARMin_LdrEX:
2154 addHRegUse(u, HRmRead, hregARM_R4());
2155 addHRegUse(u, HRmWrite, hregARM_R2());
2156 if (i->ARMin.LdrEX.szB == 8)
2157 addHRegUse(u, HRmWrite, hregARM_R3());
2158 return;
2159 case ARMin_StrEX:
2160 addHRegUse(u, HRmRead, hregARM_R4());
2161 addHRegUse(u, HRmWrite, hregARM_R0());
2162 addHRegUse(u, HRmRead, hregARM_R2());
2163 if (i->ARMin.StrEX.szB == 8)
2164 addHRegUse(u, HRmRead, hregARM_R3());
2165 return;
2166 case ARMin_VLdStD:
2167 addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
2168 if (i->ARMin.VLdStD.isLoad) {
2169 addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
2170 } else {
2171 addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
2172 }
2173 return;
2174 case ARMin_VLdStS:
2175 addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
2176 if (i->ARMin.VLdStS.isLoad) {
2177 addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
2178 } else {
2179 addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
2180 }
2181 return;
2182 case ARMin_VAluD:
2183 addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
2184 addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
2185 addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
2186 return;
2187 case ARMin_VAluS:
2188 addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2189 addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2190 addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2191 return;
2192 case ARMin_VUnaryD:
2193 addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2194 addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2195 return;
2196 case ARMin_VUnaryS:
2197 addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2198 addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2199 return;
2200 case ARMin_VCmpD:
2201 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
2202 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
2203 return;
2204 case ARMin_VCMovD:
2205 addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2206 addHRegUse(u, HRmRead, i->ARMin.VCMovD.dst);
2207 addHRegUse(u, HRmRead, i->ARMin.VCMovD.src);
2208 return;
2209 case ARMin_VCMovS:
2210 addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2211 addHRegUse(u, HRmRead, i->ARMin.VCMovS.dst);
2212 addHRegUse(u, HRmRead, i->ARMin.VCMovS.src);
2213 return;
2214 case ARMin_VCvtSD:
2215 addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
2216 addHRegUse(u, HRmRead, i->ARMin.VCvtSD.src);
2217 return;
2218 case ARMin_VXferD:
2219 if (i->ARMin.VXferD.toD) {
2220 addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2221 addHRegUse(u, HRmRead, i->ARMin.VXferD.rHi);
2222 addHRegUse(u, HRmRead, i->ARMin.VXferD.rLo);
2223 } else {
2224 addHRegUse(u, HRmRead, i->ARMin.VXferD.dD);
2225 addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2226 addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2227 }
2228 return;
2229 case ARMin_VXferS:
2230 if (i->ARMin.VXferS.toS) {
2231 addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2232 addHRegUse(u, HRmRead, i->ARMin.VXferS.rLo);
2233 } else {
2234 addHRegUse(u, HRmRead, i->ARMin.VXferS.fD);
2235 addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2236 }
2237 return;
2238 case ARMin_VCvtID:
2239 addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2240 addHRegUse(u, HRmRead, i->ARMin.VCvtID.src);
2241 return;
2242 case ARMin_FPSCR:
2243 if (i->ARMin.FPSCR.toFPSCR)
2244 addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
2245 else
2246 addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
2247 return;
2248 case ARMin_MFence:
2249 return;
2250 case ARMin_CLREX:
2251 return;
2252 case ARMin_NLdStQ:
2253 if (i->ARMin.NLdStQ.isLoad)
2254 addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
2255 else
2256 addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
2257 addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
2258 return;
2259 case ARMin_NLdStD:
2260 if (i->ARMin.NLdStD.isLoad)
2261 addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2262 else
2263 addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2264 addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2265 return;
2266 case ARMin_NUnary:
2267 addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2268 addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2269 return;
2270 case ARMin_NUnaryS:
2271 addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2272 addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2273 return;
2274 case ARMin_NShift:
2275 addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2276 addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2277 addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2278 return;
2279 case ARMin_NShl64:
2280 addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
2281 addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
2282 return;
2283 case ARMin_NDual:
2284 addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2285 addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2286 addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2287 addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2288 return;
2289 case ARMin_NBinary:
2290 addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2291 /* TODO: sometimes dst is also being read! */
2292 // XXX fix this
2293 addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2294 addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2295 return;
2296 case ARMin_NeonImm:
2297 addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
2298 return;
2299 case ARMin_NCMovQ:
2300 addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2301 addHRegUse(u, HRmRead, i->ARMin.NCMovQ.dst);
2302 addHRegUse(u, HRmRead, i->ARMin.NCMovQ.src);
2303 return;
2304 case ARMin_Add32:
2305 addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2306 addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2307 return;
2308 case ARMin_EvCheck:
2309 /* We expect both amodes only to mention r8, so this is in
2310 fact pointless, since r8 isn't allocatable, but
2311 anyway.. */
2312 addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
2313 addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
2314 addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
2315 return;
2316 case ARMin_ProfInc:
2317 addHRegUse(u, HRmWrite, hregARM_R12());
2318 addHRegUse(u, HRmWrite, hregARM_R11());
2319 return;
2320 default:
2321 ppARMInstr(i);
2322 vpanic("getRegUsage_ARMInstr");
2323 }
2324 }
2325
2326
mapRegs_ARMInstr(HRegRemap * m,ARMInstr * i,Bool mode64)2327 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2328 {
2329 vassert(mode64 == False);
2330 switch (i->tag) {
2331 case ARMin_Alu:
2332 i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
2333 i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
2334 mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
2335 return;
2336 case ARMin_Shift:
2337 i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
2338 i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
2339 mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
2340 return;
2341 case ARMin_Unary:
2342 i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
2343 i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
2344 return;
2345 case ARMin_CmpOrTst:
2346 i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
2347 mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
2348 return;
2349 case ARMin_Mov:
2350 i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
2351 mapRegs_ARMRI84(m, i->ARMin.Mov.src);
2352 return;
2353 case ARMin_Imm32:
2354 i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
2355 return;
2356 case ARMin_LdSt32:
2357 i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
2358 mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
2359 return;
2360 case ARMin_LdSt16:
2361 i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
2362 mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
2363 return;
2364 case ARMin_LdSt8U:
2365 i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
2366 mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
2367 return;
2368 case ARMin_Ld8S:
2369 i->ARMin.Ld8S.rD = lookupHRegRemap(m, i->ARMin.Ld8S.rD);
2370 mapRegs_ARMAMode2(m, i->ARMin.Ld8S.amode);
2371 return;
2372 case ARMin_XDirect:
2373 mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
2374 return;
2375 case ARMin_XIndir:
2376 i->ARMin.XIndir.dstGA
2377 = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
2378 mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
2379 return;
2380 case ARMin_XAssisted:
2381 i->ARMin.XAssisted.dstGA
2382 = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
2383 mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
2384 return;
2385 case ARMin_CMov:
2386 i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
2387 mapRegs_ARMRI84(m, i->ARMin.CMov.src);
2388 return;
2389 case ARMin_Call:
2390 return;
2391 case ARMin_Mul:
2392 return;
2393 case ARMin_LdrEX:
2394 return;
2395 case ARMin_StrEX:
2396 return;
2397 case ARMin_VLdStD:
2398 i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
2399 mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
2400 return;
2401 case ARMin_VLdStS:
2402 i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
2403 mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
2404 return;
2405 case ARMin_VAluD:
2406 i->ARMin.VAluD.dst = lookupHRegRemap(m, i->ARMin.VAluD.dst);
2407 i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
2408 i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
2409 return;
2410 case ARMin_VAluS:
2411 i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst);
2412 i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
2413 i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
2414 return;
2415 case ARMin_VUnaryD:
2416 i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
2417 i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
2418 return;
2419 case ARMin_VUnaryS:
2420 i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
2421 i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
2422 return;
2423 case ARMin_VCmpD:
2424 i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
2425 i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
2426 return;
2427 case ARMin_VCMovD:
2428 i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
2429 i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
2430 return;
2431 case ARMin_VCMovS:
2432 i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
2433 i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
2434 return;
2435 case ARMin_VCvtSD:
2436 i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
2437 i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
2438 return;
2439 case ARMin_VXferD:
2440 i->ARMin.VXferD.dD = lookupHRegRemap(m, i->ARMin.VXferD.dD);
2441 i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
2442 i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
2443 return;
2444 case ARMin_VXferS:
2445 i->ARMin.VXferS.fD = lookupHRegRemap(m, i->ARMin.VXferS.fD);
2446 i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
2447 return;
2448 case ARMin_VCvtID:
2449 i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
2450 i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
2451 return;
2452 case ARMin_FPSCR:
2453 i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
2454 return;
2455 case ARMin_MFence:
2456 return;
2457 case ARMin_CLREX:
2458 return;
2459 case ARMin_NLdStQ:
2460 i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
2461 mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
2462 return;
2463 case ARMin_NLdStD:
2464 i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
2465 mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
2466 return;
2467 case ARMin_NUnary:
2468 i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
2469 i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
2470 return;
2471 case ARMin_NUnaryS:
2472 i->ARMin.NUnaryS.src->reg
2473 = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
2474 i->ARMin.NUnaryS.dst->reg
2475 = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
2476 return;
2477 case ARMin_NShift:
2478 i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
2479 i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
2480 i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
2481 return;
2482 case ARMin_NShl64:
2483 i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
2484 i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
2485 return;
2486 case ARMin_NDual:
2487 i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
2488 i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
2489 return;
2490 case ARMin_NBinary:
2491 i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
2492 i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
2493 i->ARMin.NBinary.dst = lookupHRegRemap(m, i->ARMin.NBinary.dst);
2494 return;
2495 case ARMin_NeonImm:
2496 i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
2497 return;
2498 case ARMin_NCMovQ:
2499 i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
2500 i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
2501 return;
2502 case ARMin_Add32:
2503 i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
2504 i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
2505 return;
2506 case ARMin_EvCheck:
2507 /* We expect both amodes only to mention r8, so this is in
2508 fact pointless, since r8 isn't allocatable, but
2509 anyway.. */
2510 mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
2511 mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
2512 return;
2513 case ARMin_ProfInc:
2514 /* hardwires r11 and r12 -- nothing to modify. */
2515 return;
2516 default:
2517 ppARMInstr(i);
2518 vpanic("mapRegs_ARMInstr");
2519 }
2520 }
2521
2522 /* Figure out if i represents a reg-reg move, and if so assign the
2523 source and destination to *src and *dst. If in doubt say No. Used
2524 by the register allocator to do move coalescing.
2525 */
isMove_ARMInstr(ARMInstr * i,HReg * src,HReg * dst)2526 Bool isMove_ARMInstr ( ARMInstr* i, HReg* src, HReg* dst )
2527 {
2528 /* Moves between integer regs */
2529 switch (i->tag) {
2530 case ARMin_Mov:
2531 if (i->ARMin.Mov.src->tag == ARMri84_R) {
2532 *src = i->ARMin.Mov.src->ARMri84.R.reg;
2533 *dst = i->ARMin.Mov.dst;
2534 return True;
2535 }
2536 break;
2537 case ARMin_VUnaryD:
2538 if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2539 *src = i->ARMin.VUnaryD.src;
2540 *dst = i->ARMin.VUnaryD.dst;
2541 return True;
2542 }
2543 break;
2544 case ARMin_VUnaryS:
2545 if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2546 *src = i->ARMin.VUnaryS.src;
2547 *dst = i->ARMin.VUnaryS.dst;
2548 return True;
2549 }
2550 break;
2551 case ARMin_NUnary:
2552 if (i->ARMin.NUnary.op == ARMneon_COPY) {
2553 *src = i->ARMin.NUnary.src;
2554 *dst = i->ARMin.NUnary.dst;
2555 return True;
2556 }
2557 break;
2558 default:
2559 break;
2560 }
2561
2562 return False;
2563 }
2564
2565
2566 /* Generate arm spill/reload instructions under the direction of the
2567 register allocator. Note it's critical these don't write the
2568 condition codes. */
2569
genSpill_ARM(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2570 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2571 HReg rreg, Int offsetB, Bool mode64 )
2572 {
2573 HRegClass rclass;
2574 vassert(offsetB >= 0);
2575 vassert(!hregIsVirtual(rreg));
2576 vassert(mode64 == False);
2577 *i1 = *i2 = NULL;
2578 rclass = hregClass(rreg);
2579 switch (rclass) {
2580 case HRcInt32:
2581 vassert(offsetB <= 4095);
2582 *i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
2583 rreg,
2584 ARMAMode1_RI(hregARM_R8(), offsetB) );
2585 return;
2586 case HRcFlt32:
2587 case HRcFlt64: {
2588 HReg r8 = hregARM_R8(); /* baseblock */
2589 HReg r12 = hregARM_R12(); /* spill temp */
2590 HReg base = r8;
2591 vassert(0 == (offsetB & 3));
2592 if (offsetB >= 1024) {
2593 Int offsetKB = offsetB / 1024;
2594 /* r12 = r8 + (1024 * offsetKB) */
2595 *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2596 ARMRI84_I84(offsetKB, 11));
2597 offsetB -= (1024 * offsetKB);
2598 base = r12;
2599 }
2600 vassert(offsetB <= 1020);
2601 if (rclass == HRcFlt32) {
2602 *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
2603 rreg,
2604 mkARMAModeV(base, offsetB) );
2605 } else {
2606 *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
2607 rreg,
2608 mkARMAModeV(base, offsetB) );
2609 }
2610 return;
2611 }
2612 case HRcVec128: {
2613 HReg r8 = hregARM_R8();
2614 HReg r12 = hregARM_R12();
2615 *i1 = ARMInstr_Add32(r12, r8, offsetB);
2616 *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
2617 return;
2618 }
2619 default:
2620 ppHRegClass(rclass);
2621 vpanic("genSpill_ARM: unimplemented regclass");
2622 }
2623 }
2624
genReload_ARM(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2625 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2626 HReg rreg, Int offsetB, Bool mode64 )
2627 {
2628 HRegClass rclass;
2629 vassert(offsetB >= 0);
2630 vassert(!hregIsVirtual(rreg));
2631 vassert(mode64 == False);
2632 *i1 = *i2 = NULL;
2633 rclass = hregClass(rreg);
2634 switch (rclass) {
2635 case HRcInt32:
2636 vassert(offsetB <= 4095);
2637 *i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
2638 rreg,
2639 ARMAMode1_RI(hregARM_R8(), offsetB) );
2640 return;
2641 case HRcFlt32:
2642 case HRcFlt64: {
2643 HReg r8 = hregARM_R8(); /* baseblock */
2644 HReg r12 = hregARM_R12(); /* spill temp */
2645 HReg base = r8;
2646 vassert(0 == (offsetB & 3));
2647 if (offsetB >= 1024) {
2648 Int offsetKB = offsetB / 1024;
2649 /* r12 = r8 + (1024 * offsetKB) */
2650 *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2651 ARMRI84_I84(offsetKB, 11));
2652 offsetB -= (1024 * offsetKB);
2653 base = r12;
2654 }
2655 vassert(offsetB <= 1020);
2656 if (rclass == HRcFlt32) {
2657 *i2 = ARMInstr_VLdStS( True/*isLoad*/,
2658 rreg,
2659 mkARMAModeV(base, offsetB) );
2660 } else {
2661 *i2 = ARMInstr_VLdStD( True/*isLoad*/,
2662 rreg,
2663 mkARMAModeV(base, offsetB) );
2664 }
2665 return;
2666 }
2667 case HRcVec128: {
2668 HReg r8 = hregARM_R8();
2669 HReg r12 = hregARM_R12();
2670 *i1 = ARMInstr_Add32(r12, r8, offsetB);
2671 *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
2672 return;
2673 }
2674 default:
2675 ppHRegClass(rclass);
2676 vpanic("genReload_ARM: unimplemented regclass");
2677 }
2678 }
2679
2680
2681 /* Emit an instruction into buf and return the number of bytes used.
2682 Note that buf is not the insn's final place, and therefore it is
2683 imperative to emit position-independent code. */
2684
iregNo(HReg r)2685 static inline UChar iregNo ( HReg r )
2686 {
2687 UInt n;
2688 vassert(hregClass(r) == HRcInt32);
2689 vassert(!hregIsVirtual(r));
2690 n = hregNumber(r);
2691 vassert(n <= 15);
2692 return toUChar(n);
2693 }
2694
dregNo(HReg r)2695 static inline UChar dregNo ( HReg r )
2696 {
2697 UInt n;
2698 if (hregClass(r) != HRcFlt64)
2699 ppHRegClass(hregClass(r));
2700 vassert(hregClass(r) == HRcFlt64);
2701 vassert(!hregIsVirtual(r));
2702 n = hregNumber(r);
2703 vassert(n <= 31);
2704 return toUChar(n);
2705 }
2706
fregNo(HReg r)2707 static inline UChar fregNo ( HReg r )
2708 {
2709 UInt n;
2710 vassert(hregClass(r) == HRcFlt32);
2711 vassert(!hregIsVirtual(r));
2712 n = hregNumber(r);
2713 vassert(n <= 31);
2714 return toUChar(n);
2715 }
2716
qregNo(HReg r)2717 static inline UChar qregNo ( HReg r )
2718 {
2719 UInt n;
2720 vassert(hregClass(r) == HRcVec128);
2721 vassert(!hregIsVirtual(r));
2722 n = hregNumber(r);
2723 vassert(n <= 15);
2724 return toUChar(n);
2725 }
2726
2727 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2728 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2729 #define X0000 BITS4(0,0,0,0)
2730 #define X0001 BITS4(0,0,0,1)
2731 #define X0010 BITS4(0,0,1,0)
2732 #define X0011 BITS4(0,0,1,1)
2733 #define X0100 BITS4(0,1,0,0)
2734 #define X0101 BITS4(0,1,0,1)
2735 #define X0110 BITS4(0,1,1,0)
2736 #define X0111 BITS4(0,1,1,1)
2737 #define X1000 BITS4(1,0,0,0)
2738 #define X1001 BITS4(1,0,0,1)
2739 #define X1010 BITS4(1,0,1,0)
2740 #define X1011 BITS4(1,0,1,1)
2741 #define X1100 BITS4(1,1,0,0)
2742 #define X1101 BITS4(1,1,0,1)
2743 #define X1110 BITS4(1,1,1,0)
2744 #define X1111 BITS4(1,1,1,1)
2745
2746 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2747 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2748 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2749 (((zzx3) & 0xF) << 12))
2750
2751 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2752 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2753 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2754 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
2755
2756 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
2757 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2758 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2759 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
2760
2761 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2762 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2763 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2764 (((zzx0) & 0xF) << 0))
2765
2766 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
2767 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2768 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2769 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
2770 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
2771
2772 #define XX______(zzx7,zzx6) \
2773 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2774
2775 /* Generate a skeletal insn that involves an a RI84 shifter operand.
2776 Returns a word which is all zeroes apart from bits 25 and 11..0,
2777 since it is those that encode the shifter operand (at least to the
2778 extent that we care about it.) */
skeletal_RI84(ARMRI84 * ri)2779 static UInt skeletal_RI84 ( ARMRI84* ri )
2780 {
2781 UInt instr;
2782 if (ri->tag == ARMri84_I84) {
2783 vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
2784 vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
2785 instr = 1 << 25;
2786 instr |= (ri->ARMri84.I84.imm4 << 8);
2787 instr |= ri->ARMri84.I84.imm8;
2788 } else {
2789 instr = 0 << 25;
2790 instr |= iregNo(ri->ARMri84.R.reg);
2791 }
2792 return instr;
2793 }
2794
2795 /* Ditto for RI5. Resulting word is zeroes apart from bit 4 and bits
2796 11..7. */
skeletal_RI5(ARMRI5 * ri)2797 static UInt skeletal_RI5 ( ARMRI5* ri )
2798 {
2799 UInt instr;
2800 if (ri->tag == ARMri5_I5) {
2801 UInt imm5 = ri->ARMri5.I5.imm5;
2802 vassert(imm5 >= 1 && imm5 <= 31);
2803 instr = 0 << 4;
2804 instr |= imm5 << 7;
2805 } else {
2806 instr = 1 << 4;
2807 instr |= iregNo(ri->ARMri5.R.reg) << 8;
2808 }
2809 return instr;
2810 }
2811
2812
2813 /* Get an immediate into a register, using only that
2814 register. (very lame..) */
imm32_to_iregNo(UInt * p,Int rD,UInt imm32)2815 static UInt* imm32_to_iregNo ( UInt* p, Int rD, UInt imm32 )
2816 {
2817 UInt instr;
2818 vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
2819 #if 0
2820 if (0 == (imm32 & ~0xFF)) {
2821 /* mov with a immediate shifter operand of (0, imm32) (??) */
2822 instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
2823 instr |= imm32;
2824 *p++ = instr;
2825 } else {
2826 // this is very bad; causes Dcache pollution
2827 // ldr rD, [pc]
2828 instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
2829 *p++ = instr;
2830 // b .+8
2831 instr = 0xEA000000;
2832 *p++ = instr;
2833 // .word imm32
2834 *p++ = imm32;
2835 }
2836 #else
2837 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2838 /* Generate movw rD, #low16. Then, if the high 16 are
2839 nonzero, generate movt rD, #high16. */
2840 UInt lo16 = imm32 & 0xFFFF;
2841 UInt hi16 = (imm32 >> 16) & 0xFFFF;
2842 instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2843 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2844 lo16 & 0xF);
2845 *p++ = instr;
2846 if (hi16 != 0) {
2847 instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2848 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2849 hi16 & 0xF);
2850 *p++ = instr;
2851 }
2852 } else {
2853 UInt imm, rot;
2854 UInt op = X1010;
2855 UInt rN = 0;
2856 if ((imm32 & 0xFF) || (imm32 == 0)) {
2857 imm = imm32 & 0xFF;
2858 rot = 0;
2859 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2860 *p++ = instr;
2861 op = X1000;
2862 rN = rD;
2863 }
2864 if (imm32 & 0xFF000000) {
2865 imm = (imm32 >> 24) & 0xFF;
2866 rot = 4;
2867 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2868 *p++ = instr;
2869 op = X1000;
2870 rN = rD;
2871 }
2872 if (imm32 & 0xFF0000) {
2873 imm = (imm32 >> 16) & 0xFF;
2874 rot = 8;
2875 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2876 *p++ = instr;
2877 op = X1000;
2878 rN = rD;
2879 }
2880 if (imm32 & 0xFF00) {
2881 imm = (imm32 >> 8) & 0xFF;
2882 rot = 12;
2883 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2884 *p++ = instr;
2885 op = X1000;
2886 rN = rD;
2887 }
2888 }
2889 #endif
2890 return p;
2891 }
2892
2893 /* Get an immediate into a register, using only that register, and
2894 generating exactly 2 instructions, regardless of the value of the
2895 immediate. This is used when generating sections of code that need
2896 to be patched later, so as to guarantee a specific size. */
imm32_to_iregNo_EXACTLY2(UInt * p,Int rD,UInt imm32)2897 static UInt* imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2898 {
2899 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2900 /* Generate movw rD, #low16 ; movt rD, #high16. */
2901 UInt lo16 = imm32 & 0xFFFF;
2902 UInt hi16 = (imm32 >> 16) & 0xFFFF;
2903 UInt instr;
2904 instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2905 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2906 lo16 & 0xF);
2907 *p++ = instr;
2908 instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2909 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2910 hi16 & 0xF);
2911 *p++ = instr;
2912 } else {
2913 vassert(0); /* lose */
2914 }
2915 return p;
2916 }
2917
2918 /* Check whether p points at a 2-insn sequence cooked up by
2919 imm32_to_iregNo_EXACTLY2(). */
is_imm32_to_iregNo_EXACTLY2(UInt * p,Int rD,UInt imm32)2920 static Bool is_imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2921 {
2922 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2923 /* Generate movw rD, #low16 ; movt rD, #high16. */
2924 UInt lo16 = imm32 & 0xFFFF;
2925 UInt hi16 = (imm32 >> 16) & 0xFFFF;
2926 UInt i0, i1;
2927 i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2928 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2929 lo16 & 0xF);
2930 i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2931 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2932 hi16 & 0xF);
2933 return p[0] == i0 && p[1] == i1;
2934 } else {
2935 vassert(0); /* lose */
2936 }
2937 }
2938
2939
do_load_or_store32(UInt * p,Bool isLoad,UInt rD,ARMAMode1 * am)2940 static UInt* do_load_or_store32 ( UInt* p,
2941 Bool isLoad, UInt rD, ARMAMode1* am )
2942 {
2943 vassert(rD <= 12);
2944 vassert(am->tag == ARMam1_RI); // RR case is not handled
2945 UInt bB = 0;
2946 UInt bL = isLoad ? 1 : 0;
2947 Int simm12;
2948 UInt instr, bP;
2949 if (am->ARMam1.RI.simm13 < 0) {
2950 bP = 0;
2951 simm12 = -am->ARMam1.RI.simm13;
2952 } else {
2953 bP = 1;
2954 simm12 = am->ARMam1.RI.simm13;
2955 }
2956 vassert(simm12 >= 0 && simm12 <= 4095);
2957 instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
2958 iregNo(am->ARMam1.RI.reg),
2959 rD);
2960 instr |= simm12;
2961 *p++ = instr;
2962 return p;
2963 }
2964
2965
2966 /* Emit an instruction into buf and return the number of bytes used.
2967 Note that buf is not the insn's final place, and therefore it is
2968 imperative to emit position-independent code. If the emitted
2969 instruction was a profiler inc, set *is_profInc to True, else
2970 leave it unchanged. */
2971
emit_ARMInstr(Bool * is_profInc,UChar * buf,Int nbuf,ARMInstr * i,Bool mode64,void * disp_cp_chain_me_to_slowEP,void * disp_cp_chain_me_to_fastEP,void * disp_cp_xindir,void * disp_cp_xassisted)2972 Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
2973 UChar* buf, Int nbuf, ARMInstr* i,
2974 Bool mode64,
2975 void* disp_cp_chain_me_to_slowEP,
2976 void* disp_cp_chain_me_to_fastEP,
2977 void* disp_cp_xindir,
2978 void* disp_cp_xassisted )
2979 {
2980 UInt* p = (UInt*)buf;
2981 vassert(nbuf >= 32);
2982 vassert(mode64 == False);
2983 vassert(0 == (((HWord)buf) & 3));
2984
2985 switch (i->tag) {
2986 case ARMin_Alu: {
2987 UInt instr, subopc;
2988 UInt rD = iregNo(i->ARMin.Alu.dst);
2989 UInt rN = iregNo(i->ARMin.Alu.argL);
2990 ARMRI84* argR = i->ARMin.Alu.argR;
2991 switch (i->ARMin.Alu.op) {
2992 case ARMalu_ADDS: /* fallthru */
2993 case ARMalu_ADD: subopc = X0100; break;
2994 case ARMalu_ADC: subopc = X0101; break;
2995 case ARMalu_SUBS: /* fallthru */
2996 case ARMalu_SUB: subopc = X0010; break;
2997 case ARMalu_SBC: subopc = X0110; break;
2998 case ARMalu_AND: subopc = X0000; break;
2999 case ARMalu_BIC: subopc = X1110; break;
3000 case ARMalu_OR: subopc = X1100; break;
3001 case ARMalu_XOR: subopc = X0001; break;
3002 default: goto bad;
3003 }
3004 instr = skeletal_RI84(argR);
3005 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3006 (subopc << 1) & 0xF, rN, rD);
3007 if (i->ARMin.Alu.op == ARMalu_ADDS
3008 || i->ARMin.Alu.op == ARMalu_SUBS) {
3009 instr |= 1<<20; /* set the S bit */
3010 }
3011 *p++ = instr;
3012 goto done;
3013 }
3014 case ARMin_Shift: {
3015 UInt instr, subopc;
3016 UInt rD = iregNo(i->ARMin.Shift.dst);
3017 UInt rM = iregNo(i->ARMin.Shift.argL);
3018 ARMRI5* argR = i->ARMin.Shift.argR;
3019 switch (i->ARMin.Shift.op) {
3020 case ARMsh_SHL: subopc = X0000; break;
3021 case ARMsh_SHR: subopc = X0001; break;
3022 case ARMsh_SAR: subopc = X0010; break;
3023 default: goto bad;
3024 }
3025 instr = skeletal_RI5(argR);
3026 instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
3027 instr |= (subopc & 3) << 5;
3028 *p++ = instr;
3029 goto done;
3030 }
3031 case ARMin_Unary: {
3032 UInt instr;
3033 UInt rDst = iregNo(i->ARMin.Unary.dst);
3034 UInt rSrc = iregNo(i->ARMin.Unary.src);
3035 switch (i->ARMin.Unary.op) {
3036 case ARMun_CLZ:
3037 instr = XXXXXXXX(X1110,X0001,X0110,X1111,
3038 rDst,X1111,X0001,rSrc);
3039 *p++ = instr;
3040 goto done;
3041 case ARMun_NEG: /* RSB rD,rS,#0 */
3042 instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
3043 *p++ = instr;
3044 goto done;
3045 case ARMun_NOT: {
3046 UInt subopc = X1111; /* MVN */
3047 instr = rSrc;
3048 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3049 (subopc << 1) & 0xF, 0, rDst);
3050 *p++ = instr;
3051 goto done;
3052 }
3053 default:
3054 break;
3055 }
3056 goto bad;
3057 }
3058 case ARMin_CmpOrTst: {
3059 UInt instr = skeletal_RI84(i->ARMin.CmpOrTst.argR);
3060 UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
3061 UInt SBZ = 0;
3062 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3063 ((subopc << 1) & 0xF) | 1,
3064 iregNo(i->ARMin.CmpOrTst.argL), SBZ );
3065 *p++ = instr;
3066 goto done;
3067 }
3068 case ARMin_Mov: {
3069 UInt instr = skeletal_RI84(i->ARMin.Mov.src);
3070 UInt subopc = X1101; /* MOV */
3071 UInt SBZ = 0;
3072 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3073 (subopc << 1) & 0xF, SBZ,
3074 iregNo(i->ARMin.Mov.dst));
3075 *p++ = instr;
3076 goto done;
3077 }
3078 case ARMin_Imm32: {
3079 p = imm32_to_iregNo( (UInt*)p, iregNo(i->ARMin.Imm32.dst),
3080 i->ARMin.Imm32.imm32 );
3081 goto done;
3082 }
3083 case ARMin_LdSt32:
3084 case ARMin_LdSt8U: {
3085 UInt bL, bB;
3086 HReg rD;
3087 ARMAMode1* am;
3088 ARMCondCode cc;
3089 if (i->tag == ARMin_LdSt32) {
3090 bB = 0;
3091 bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
3092 am = i->ARMin.LdSt32.amode;
3093 rD = i->ARMin.LdSt32.rD;
3094 cc = i->ARMin.LdSt32.cc;
3095 } else {
3096 bB = 1;
3097 bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
3098 am = i->ARMin.LdSt8U.amode;
3099 rD = i->ARMin.LdSt8U.rD;
3100 cc = i->ARMin.LdSt8U.cc;
3101 }
3102 vassert(cc != ARMcc_NV);
3103 if (am->tag == ARMam1_RI) {
3104 Int simm12;
3105 UInt instr, bP;
3106 if (am->ARMam1.RI.simm13 < 0) {
3107 bP = 0;
3108 simm12 = -am->ARMam1.RI.simm13;
3109 } else {
3110 bP = 1;
3111 simm12 = am->ARMam1.RI.simm13;
3112 }
3113 vassert(simm12 >= 0 && simm12 <= 4095);
3114 instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
3115 iregNo(am->ARMam1.RI.reg),
3116 iregNo(rD));
3117 instr |= simm12;
3118 *p++ = instr;
3119 goto done;
3120 } else {
3121 // RR case
3122 goto bad;
3123 }
3124 }
3125 case ARMin_LdSt16: {
3126 HReg rD = i->ARMin.LdSt16.rD;
3127 UInt bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
3128 UInt bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
3129 ARMAMode2* am = i->ARMin.LdSt16.amode;
3130 ARMCondCode cc = i->ARMin.LdSt16.cc;
3131 vassert(cc != ARMcc_NV);
3132 if (am->tag == ARMam2_RI) {
3133 HReg rN = am->ARMam2.RI.reg;
3134 Int simm8;
3135 UInt bP, imm8hi, imm8lo, instr;
3136 if (am->ARMam2.RI.simm9 < 0) {
3137 bP = 0;
3138 simm8 = -am->ARMam2.RI.simm9;
3139 } else {
3140 bP = 1;
3141 simm8 = am->ARMam2.RI.simm9;
3142 }
3143 vassert(simm8 >= 0 && simm8 <= 255);
3144 imm8hi = (simm8 >> 4) & 0xF;
3145 imm8lo = simm8 & 0xF;
3146 vassert(!(bL == 0 && bS == 1)); // "! signed store"
3147 /**/ if (bL == 0 && bS == 0) {
3148 // strh
3149 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregNo(rN),
3150 iregNo(rD), imm8hi, X1011, imm8lo);
3151 *p++ = instr;
3152 goto done;
3153 }
3154 else if (bL == 1 && bS == 0) {
3155 // ldrh
3156 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
3157 iregNo(rD), imm8hi, X1011, imm8lo);
3158 *p++ = instr;
3159 goto done;
3160 }
3161 else if (bL == 1 && bS == 1) {
3162 // ldrsh
3163 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
3164 iregNo(rD), imm8hi, X1111, imm8lo);
3165 *p++ = instr;
3166 goto done;
3167 }
3168 else vassert(0); // ill-constructed insn
3169 } else {
3170 // RR case
3171 goto bad;
3172 }
3173 }
3174 case ARMin_Ld8S: {
3175 HReg rD = i->ARMin.Ld8S.rD;
3176 ARMAMode2* am = i->ARMin.Ld8S.amode;
3177 ARMCondCode cc = i->ARMin.Ld8S.cc;
3178 vassert(cc != ARMcc_NV);
3179 if (am->tag == ARMam2_RI) {
3180 HReg rN = am->ARMam2.RI.reg;
3181 Int simm8;
3182 UInt bP, imm8hi, imm8lo, instr;
3183 if (am->ARMam2.RI.simm9 < 0) {
3184 bP = 0;
3185 simm8 = -am->ARMam2.RI.simm9;
3186 } else {
3187 bP = 1;
3188 simm8 = am->ARMam2.RI.simm9;
3189 }
3190 vassert(simm8 >= 0 && simm8 <= 255);
3191 imm8hi = (simm8 >> 4) & 0xF;
3192 imm8lo = simm8 & 0xF;
3193 // ldrsb
3194 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
3195 iregNo(rD), imm8hi, X1101, imm8lo);
3196 *p++ = instr;
3197 goto done;
3198 } else {
3199 // RR case
3200 goto bad;
3201 }
3202 }
3203
3204 case ARMin_XDirect: {
3205 /* NB: what goes on here has to be very closely coordinated
3206 with the chainXDirect_ARM and unchainXDirect_ARM below. */
3207 /* We're generating chain-me requests here, so we need to be
3208 sure this is actually allowed -- no-redir translations
3209 can't use chain-me's. Hence: */
3210 vassert(disp_cp_chain_me_to_slowEP != NULL);
3211 vassert(disp_cp_chain_me_to_fastEP != NULL);
3212
3213 /* Use ptmp for backpatching conditional jumps. */
3214 UInt* ptmp = NULL;
3215
3216 /* First off, if this is conditional, create a conditional
3217 jump over the rest of it. Or at least, leave a space for
3218 it that we will shortly fill in. */
3219 if (i->ARMin.XDirect.cond != ARMcc_AL) {
3220 vassert(i->ARMin.XDirect.cond != ARMcc_NV);
3221 ptmp = p;
3222 *p++ = 0;
3223 }
3224
3225 /* Update the guest R15T. */
3226 /* movw r12, lo16(dstGA) */
3227 /* movt r12, hi16(dstGA) */
3228 /* str r12, amR15T */
3229 p = imm32_to_iregNo(p, /*r*/12, i->ARMin.XDirect.dstGA);
3230 p = do_load_or_store32(p, False/*!isLoad*/,
3231 /*r*/12, i->ARMin.XDirect.amR15T);
3232
3233 /* --- FIRST PATCHABLE BYTE follows --- */
3234 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3235 calling to) backs up the return address, so as to find the
3236 address of the first patchable byte. So: don't change the
3237 number of instructions (3) below. */
3238 /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3239 /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3240 /* blx r12 (A1) */
3241 void* disp_cp_chain_me
3242 = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3243 : disp_cp_chain_me_to_slowEP;
3244 p = imm32_to_iregNo_EXACTLY2(p, /*r*/12,
3245 (UInt)Ptr_to_ULong(disp_cp_chain_me));
3246 *p++ = 0xE12FFF3C;
3247 /* --- END of PATCHABLE BYTES --- */
3248
3249 /* Fix up the conditional jump, if there was one. */
3250 if (i->ARMin.XDirect.cond != ARMcc_AL) {
3251 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3252 vassert(delta > 0 && delta < 40);
3253 vassert((delta & 3) == 0);
3254 UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
3255 vassert(notCond <= 13); /* Neither AL nor NV */
3256 delta = (delta >> 2) - 2;
3257 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3258 }
3259 goto done;
3260 }
3261
3262 case ARMin_XIndir: {
3263 /* We're generating transfers that could lead indirectly to a
3264 chain-me, so we need to be sure this is actually allowed
3265 -- no-redir translations are not allowed to reach normal
3266 translations without going through the scheduler. That
3267 means no XDirects or XIndirs out from no-redir
3268 translations. Hence: */
3269 vassert(disp_cp_xindir != NULL);
3270
3271 /* Use ptmp for backpatching conditional jumps. */
3272 UInt* ptmp = NULL;
3273
3274 /* First off, if this is conditional, create a conditional
3275 jump over the rest of it. Or at least, leave a space for
3276 it that we will shortly fill in. */
3277 if (i->ARMin.XIndir.cond != ARMcc_AL) {
3278 vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3279 ptmp = p;
3280 *p++ = 0;
3281 }
3282
3283 /* Update the guest R15T. */
3284 /* str r-dstGA, amR15T */
3285 p = do_load_or_store32(p, False/*!isLoad*/,
3286 iregNo(i->ARMin.XIndir.dstGA),
3287 i->ARMin.XIndir.amR15T);
3288
3289 /* movw r12, lo16(VG_(disp_cp_xindir)) */
3290 /* movt r12, hi16(VG_(disp_cp_xindir)) */
3291 /* bx r12 (A1) */
3292 p = imm32_to_iregNo(p, /*r*/12,
3293 (UInt)Ptr_to_ULong(disp_cp_xindir));
3294 *p++ = 0xE12FFF1C;
3295
3296 /* Fix up the conditional jump, if there was one. */
3297 if (i->ARMin.XIndir.cond != ARMcc_AL) {
3298 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3299 vassert(delta > 0 && delta < 40);
3300 vassert((delta & 3) == 0);
3301 UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3302 vassert(notCond <= 13); /* Neither AL nor NV */
3303 delta = (delta >> 2) - 2;
3304 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3305 }
3306 goto done;
3307 }
3308
3309 case ARMin_XAssisted: {
3310 /* Use ptmp for backpatching conditional jumps. */
3311 UInt* ptmp = NULL;
3312
3313 /* First off, if this is conditional, create a conditional
3314 jump over the rest of it. Or at least, leave a space for
3315 it that we will shortly fill in. */
3316 if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3317 vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
3318 ptmp = p;
3319 *p++ = 0;
3320 }
3321
3322 /* Update the guest R15T. */
3323 /* str r-dstGA, amR15T */
3324 p = do_load_or_store32(p, False/*!isLoad*/,
3325 iregNo(i->ARMin.XAssisted.dstGA),
3326 i->ARMin.XAssisted.amR15T);
3327
3328 /* movw r8, $magic_number */
3329 UInt trcval = 0;
3330 switch (i->ARMin.XAssisted.jk) {
3331 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3332 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3333 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
3334 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3335 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3336 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3337 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3338 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3339 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3340 //case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3341 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3342 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3343 /* We don't expect to see the following being assisted. */
3344 //case Ijk_Ret:
3345 //case Ijk_Call:
3346 /* fallthrough */
3347 default:
3348 ppIRJumpKind(i->ARMin.XAssisted.jk);
3349 vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
3350 }
3351 vassert(trcval != 0);
3352 p = imm32_to_iregNo(p, /*r*/8, trcval);
3353
3354 /* movw r12, lo16(VG_(disp_cp_xassisted)) */
3355 /* movt r12, hi16(VG_(disp_cp_xassisted)) */
3356 /* bx r12 (A1) */
3357 p = imm32_to_iregNo(p, /*r*/12,
3358 (UInt)Ptr_to_ULong(disp_cp_xassisted));
3359 *p++ = 0xE12FFF1C;
3360
3361 /* Fix up the conditional jump, if there was one. */
3362 if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3363 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3364 vassert(delta > 0 && delta < 40);
3365 vassert((delta & 3) == 0);
3366 UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
3367 vassert(notCond <= 13); /* Neither AL nor NV */
3368 delta = (delta >> 2) - 2;
3369 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3370 }
3371 goto done;
3372 }
3373
3374 case ARMin_CMov: {
3375 UInt instr = skeletal_RI84(i->ARMin.CMov.src);
3376 UInt subopc = X1101; /* MOV */
3377 UInt SBZ = 0;
3378 instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
3379 (subopc << 1) & 0xF, SBZ,
3380 iregNo(i->ARMin.CMov.dst));
3381 *p++ = instr;
3382 goto done;
3383 }
3384
3385 case ARMin_Call: {
3386 UInt instr;
3387 /* Decide on a scratch reg used to hold to the call address.
3388 This has to be done as per the comments in getRegUsage. */
3389 Int scratchNo;
3390 switch (i->ARMin.Call.nArgRegs) {
3391 case 0: scratchNo = 0; break;
3392 case 1: scratchNo = 1; break;
3393 case 2: scratchNo = 2; break;
3394 case 3: scratchNo = 3; break;
3395 case 4: scratchNo = 11; break;
3396 default: vassert(0);
3397 }
3398 /* If we don't need to do any fixup actions in the case that
3399 the call doesn't happen, just do the simple thing and emit
3400 straight-line code. We hope this is the common case. */
3401 if (i->ARMin.Call.cond == ARMcc_AL/*call always happens*/
3402 || i->ARMin.Call.rloc.pri == RLPri_None/*no fixup action*/) {
3403 // r"scratchNo" = &target
3404 p = imm32_to_iregNo( (UInt*)p,
3405 scratchNo, (UInt)i->ARMin.Call.target );
3406 // blx{cond} r"scratchNo"
3407 instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
3408 X0011, scratchNo);
3409 instr |= 0xFFF << 8; // stick in the SBOnes
3410 *p++ = instr;
3411 } else {
3412 Int delta;
3413 /* Complex case. We have to generate an if-then-else
3414 diamond. */
3415 // before:
3416 // b{!cond} else:
3417 // r"scratchNo" = &target
3418 // blx{AL} r"scratchNo"
3419 // preElse:
3420 // b after:
3421 // else:
3422 // mov r0, #0x55555555 // possibly
3423 // mov r1, r0 // possibly
3424 // after:
3425
3426 // before:
3427 UInt* pBefore = p;
3428
3429 // b{!cond} else: // ptmp1 points here
3430 *p++ = 0; // filled in later
3431
3432 // r"scratchNo" = &target
3433 p = imm32_to_iregNo( (UInt*)p,
3434 scratchNo, (UInt)i->ARMin.Call.target );
3435
3436 // blx{AL} r"scratchNo"
3437 instr = XXX___XX(ARMcc_AL, X0001, X0010, /*___*/
3438 X0011, scratchNo);
3439 instr |= 0xFFF << 8; // stick in the SBOnes
3440 *p++ = instr;
3441
3442 // preElse:
3443 UInt* pPreElse = p;
3444
3445 // b after:
3446 *p++ = 0; // filled in later
3447
3448 // else:
3449 delta = (UChar*)p - (UChar*)pBefore;
3450 delta = (delta >> 2) - 2;
3451 *pBefore
3452 = XX______(1 ^ i->ARMin.Call.cond, X1010) | (delta & 0xFFFFFF);
3453
3454 /* Do the 'else' actions */
3455 switch (i->ARMin.Call.rloc.pri) {
3456 case RLPri_Int:
3457 p = imm32_to_iregNo_EXACTLY2(p, /*r*/0, 0x55555555);
3458 break;
3459 case RLPri_2Int:
3460 vassert(0); //ATC
3461 p = imm32_to_iregNo_EXACTLY2(p, /*r*/0, 0x55555555);
3462 /* mov r1, r0 */
3463 *p++ = 0xE1A01000;
3464 break;
3465 case RLPri_None: case RLPri_INVALID: default:
3466 vassert(0);
3467 }
3468
3469 // after:
3470 delta = (UChar*)p - (UChar*)pPreElse;
3471 delta = (delta >> 2) - 2;
3472 *pPreElse = XX______(ARMcc_AL, X1010) | (delta & 0xFFFFFF);
3473 }
3474
3475 goto done;
3476 }
3477
3478 case ARMin_Mul: {
3479 /* E0000392 mul r0, r2, r3
3480 E0810392 umull r0(LO), r1(HI), r2, r3
3481 E0C10392 smull r0(LO), r1(HI), r2, r3
3482 */
3483 switch (i->ARMin.Mul.op) {
3484 case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
3485 case ARMmul_ZX: *p++ = 0xE0810392; goto done;
3486 case ARMmul_SX: *p++ = 0xE0C10392; goto done;
3487 default: vassert(0);
3488 }
3489 goto bad;
3490 }
3491 case ARMin_LdrEX: {
3492 /* E1D42F9F ldrexb r2, [r4]
3493 E1F42F9F ldrexh r2, [r4]
3494 E1942F9F ldrex r2, [r4]
3495 E1B42F9F ldrexd r2, r3, [r4]
3496 */
3497 switch (i->ARMin.LdrEX.szB) {
3498 case 1: *p++ = 0xE1D42F9F; goto done;
3499 case 2: *p++ = 0xE1F42F9F; goto done;
3500 case 4: *p++ = 0xE1942F9F; goto done;
3501 case 8: *p++ = 0xE1B42F9F; goto done;
3502 default: break;
3503 }
3504 goto bad;
3505 }
3506 case ARMin_StrEX: {
3507 /* E1C40F92 strexb r0, r2, [r4]
3508 E1E40F92 strexh r0, r2, [r4]
3509 E1840F92 strex r0, r2, [r4]
3510 E1A40F92 strexd r0, r2, r3, [r4]
3511 */
3512 switch (i->ARMin.StrEX.szB) {
3513 case 1: *p++ = 0xE1C40F92; goto done;
3514 case 2: *p++ = 0xE1E40F92; goto done;
3515 case 4: *p++ = 0xE1840F92; goto done;
3516 case 8: *p++ = 0xE1A40F92; goto done;
3517 default: break;
3518 }
3519 goto bad;
3520 }
3521 case ARMin_VLdStD: {
3522 UInt dD = dregNo(i->ARMin.VLdStD.dD);
3523 UInt rN = iregNo(i->ARMin.VLdStD.amode->reg);
3524 Int simm11 = i->ARMin.VLdStD.amode->simm11;
3525 UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3526 UInt bU = simm11 >= 0 ? 1 : 0;
3527 UInt bL = i->ARMin.VLdStD.isLoad ? 1 : 0;
3528 UInt insn;
3529 vassert(0 == (off8 & 3));
3530 off8 >>= 2;
3531 vassert(0 == (off8 & 0xFFFFFF00));
3532 insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
3533 insn |= off8;
3534 *p++ = insn;
3535 goto done;
3536 }
3537 case ARMin_VLdStS: {
3538 UInt fD = fregNo(i->ARMin.VLdStS.fD);
3539 UInt rN = iregNo(i->ARMin.VLdStS.amode->reg);
3540 Int simm11 = i->ARMin.VLdStS.amode->simm11;
3541 UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3542 UInt bU = simm11 >= 0 ? 1 : 0;
3543 UInt bL = i->ARMin.VLdStS.isLoad ? 1 : 0;
3544 UInt bD = fD & 1;
3545 UInt insn;
3546 vassert(0 == (off8 & 3));
3547 off8 >>= 2;
3548 vassert(0 == (off8 & 0xFFFFFF00));
3549 insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
3550 insn |= off8;
3551 *p++ = insn;
3552 goto done;
3553 }
3554 case ARMin_VAluD: {
3555 UInt dN = dregNo(i->ARMin.VAluD.argL);
3556 UInt dD = dregNo(i->ARMin.VAluD.dst);
3557 UInt dM = dregNo(i->ARMin.VAluD.argR);
3558 UInt pqrs = X1111; /* undefined */
3559 switch (i->ARMin.VAluD.op) {
3560 case ARMvfp_ADD: pqrs = X0110; break;
3561 case ARMvfp_SUB: pqrs = X0111; break;
3562 case ARMvfp_MUL: pqrs = X0100; break;
3563 case ARMvfp_DIV: pqrs = X1000; break;
3564 default: goto bad;
3565 }
3566 vassert(pqrs != X1111);
3567 UInt bP = (pqrs >> 3) & 1;
3568 UInt bQ = (pqrs >> 2) & 1;
3569 UInt bR = (pqrs >> 1) & 1;
3570 UInt bS = (pqrs >> 0) & 1;
3571 UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
3572 X1011, BITS4(0,bS,0,0), dM);
3573 *p++ = insn;
3574 goto done;
3575 }
3576 case ARMin_VAluS: {
3577 UInt dN = fregNo(i->ARMin.VAluS.argL);
3578 UInt dD = fregNo(i->ARMin.VAluS.dst);
3579 UInt dM = fregNo(i->ARMin.VAluS.argR);
3580 UInt bN = dN & 1;
3581 UInt bD = dD & 1;
3582 UInt bM = dM & 1;
3583 UInt pqrs = X1111; /* undefined */
3584 switch (i->ARMin.VAluS.op) {
3585 case ARMvfp_ADD: pqrs = X0110; break;
3586 case ARMvfp_SUB: pqrs = X0111; break;
3587 case ARMvfp_MUL: pqrs = X0100; break;
3588 case ARMvfp_DIV: pqrs = X1000; break;
3589 default: goto bad;
3590 }
3591 vassert(pqrs != X1111);
3592 UInt bP = (pqrs >> 3) & 1;
3593 UInt bQ = (pqrs >> 2) & 1;
3594 UInt bR = (pqrs >> 1) & 1;
3595 UInt bS = (pqrs >> 0) & 1;
3596 UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
3597 (dN >> 1), (dD >> 1),
3598 X1010, BITS4(bN,bS,bM,0), (dM >> 1));
3599 *p++ = insn;
3600 goto done;
3601 }
3602 case ARMin_VUnaryD: {
3603 UInt dD = dregNo(i->ARMin.VUnaryD.dst);
3604 UInt dM = dregNo(i->ARMin.VUnaryD.src);
3605 UInt insn = 0;
3606 switch (i->ARMin.VUnaryD.op) {
3607 case ARMvfpu_COPY:
3608 insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
3609 break;
3610 case ARMvfpu_ABS:
3611 insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
3612 break;
3613 case ARMvfpu_NEG:
3614 insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
3615 break;
3616 case ARMvfpu_SQRT:
3617 insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
3618 break;
3619 default:
3620 goto bad;
3621 }
3622 *p++ = insn;
3623 goto done;
3624 }
3625 case ARMin_VUnaryS: {
3626 UInt fD = fregNo(i->ARMin.VUnaryS.dst);
3627 UInt fM = fregNo(i->ARMin.VUnaryS.src);
3628 UInt insn = 0;
3629 switch (i->ARMin.VUnaryS.op) {
3630 case ARMvfpu_COPY:
3631 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3632 (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3633 (fM >> 1));
3634 break;
3635 case ARMvfpu_ABS:
3636 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3637 (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3638 (fM >> 1));
3639 break;
3640 case ARMvfpu_NEG:
3641 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3642 (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3643 (fM >> 1));
3644 break;
3645 case ARMvfpu_SQRT:
3646 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3647 (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3648 (fM >> 1));
3649 break;
3650 default:
3651 goto bad;
3652 }
3653 *p++ = insn;
3654 goto done;
3655 }
3656 case ARMin_VCmpD: {
3657 UInt dD = dregNo(i->ARMin.VCmpD.argL);
3658 UInt dM = dregNo(i->ARMin.VCmpD.argR);
3659 UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
3660 *p++ = insn; /* FCMPD dD, dM */
3661 *p++ = 0xEEF1FA10; /* FMSTAT */
3662 goto done;
3663 }
3664 case ARMin_VCMovD: {
3665 UInt cc = (UInt)i->ARMin.VCMovD.cond;
3666 UInt dD = dregNo(i->ARMin.VCMovD.dst);
3667 UInt dM = dregNo(i->ARMin.VCMovD.src);
3668 vassert(cc < 16 && cc != ARMcc_AL);
3669 UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
3670 *p++ = insn;
3671 goto done;
3672 }
3673 case ARMin_VCMovS: {
3674 UInt cc = (UInt)i->ARMin.VCMovS.cond;
3675 UInt fD = fregNo(i->ARMin.VCMovS.dst);
3676 UInt fM = fregNo(i->ARMin.VCMovS.src);
3677 vassert(cc < 16 && cc != ARMcc_AL);
3678 UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
3679 X0000,(fD >> 1),X1010,
3680 BITS4(0,1,(fM & 1),0), (fM >> 1));
3681 *p++ = insn;
3682 goto done;
3683 }
3684 case ARMin_VCvtSD: {
3685 if (i->ARMin.VCvtSD.sToD) {
3686 UInt dD = dregNo(i->ARMin.VCvtSD.dst);
3687 UInt fM = fregNo(i->ARMin.VCvtSD.src);
3688 UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
3689 BITS4(1,1, (fM & 1), 0),
3690 (fM >> 1));
3691 *p++ = insn;
3692 goto done;
3693 } else {
3694 UInt fD = fregNo(i->ARMin.VCvtSD.dst);
3695 UInt dM = dregNo(i->ARMin.VCvtSD.src);
3696 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
3697 X0111, (fD >> 1),
3698 X1011, X1100, dM);
3699 *p++ = insn;
3700 goto done;
3701 }
3702 }
3703 case ARMin_VXferD: {
3704 UInt dD = dregNo(i->ARMin.VXferD.dD);
3705 UInt rHi = iregNo(i->ARMin.VXferD.rHi);
3706 UInt rLo = iregNo(i->ARMin.VXferD.rLo);
3707 /* vmov dD, rLo, rHi is
3708 E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
3709 vmov rLo, rHi, dD is
3710 E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
3711 */
3712 UInt insn
3713 = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
3714 rHi, rLo, 0xB,
3715 BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
3716 *p++ = insn;
3717 goto done;
3718 }
3719 case ARMin_VXferS: {
3720 UInt fD = fregNo(i->ARMin.VXferS.fD);
3721 UInt rLo = iregNo(i->ARMin.VXferS.rLo);
3722 /* vmov fD, rLo is
3723 E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
3724 vmov rLo, fD is
3725 E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
3726 */
3727 UInt insn
3728 = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
3729 (fD >> 1) & 0xF, rLo, 0xA,
3730 BITS4((fD & 1),0,0,1), 0);
3731 *p++ = insn;
3732 goto done;
3733 }
3734 case ARMin_VCvtID: {
3735 Bool iToD = i->ARMin.VCvtID.iToD;
3736 Bool syned = i->ARMin.VCvtID.syned;
3737 if (iToD && syned) {
3738 // FSITOD: I32S-in-freg to F64-in-dreg
3739 UInt regF = fregNo(i->ARMin.VCvtID.src);
3740 UInt regD = dregNo(i->ARMin.VCvtID.dst);
3741 UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3742 X1011, BITS4(1,1,(regF & 1),0),
3743 (regF >> 1) & 0xF);
3744 *p++ = insn;
3745 goto done;
3746 }
3747 if (iToD && (!syned)) {
3748 // FUITOD: I32U-in-freg to F64-in-dreg
3749 UInt regF = fregNo(i->ARMin.VCvtID.src);
3750 UInt regD = dregNo(i->ARMin.VCvtID.dst);
3751 UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3752 X1011, BITS4(0,1,(regF & 1),0),
3753 (regF >> 1) & 0xF);
3754 *p++ = insn;
3755 goto done;
3756 }
3757 if ((!iToD) && syned) {
3758 // FTOSID: F64-in-dreg to I32S-in-freg
3759 UInt regD = dregNo(i->ARMin.VCvtID.src);
3760 UInt regF = fregNo(i->ARMin.VCvtID.dst);
3761 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3762 X1101, (regF >> 1) & 0xF,
3763 X1011, X0100, regD);
3764 *p++ = insn;
3765 goto done;
3766 }
3767 if ((!iToD) && (!syned)) {
3768 // FTOUID: F64-in-dreg to I32U-in-freg
3769 UInt regD = dregNo(i->ARMin.VCvtID.src);
3770 UInt regF = fregNo(i->ARMin.VCvtID.dst);
3771 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3772 X1100, (regF >> 1) & 0xF,
3773 X1011, X0100, regD);
3774 *p++ = insn;
3775 goto done;
3776 }
3777 /*UNREACHED*/
3778 vassert(0);
3779 }
3780 case ARMin_FPSCR: {
3781 Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
3782 UInt iReg = iregNo(i->ARMin.FPSCR.iReg);
3783 if (toFPSCR) {
3784 /* fmxr fpscr, iReg is EEE1 iReg A10 */
3785 *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
3786 goto done;
3787 }
3788 goto bad; // FPSCR -> iReg case currently ATC
3789 }
3790 case ARMin_MFence: {
3791 // It's not clear (to me) how these relate to the ARMv7
3792 // versions, so let's just use the v7 versions as they
3793 // are at least well documented.
3794 //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
3795 //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
3796 //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4 (ISB) */
3797 *p++ = 0xF57FF04F; /* DSB sy */
3798 *p++ = 0xF57FF05F; /* DMB sy */
3799 *p++ = 0xF57FF06F; /* ISB */
3800 goto done;
3801 }
3802 case ARMin_CLREX: {
3803 *p++ = 0xF57FF01F; /* clrex */
3804 goto done;
3805 }
3806
3807 case ARMin_NLdStQ: {
3808 UInt regD = qregNo(i->ARMin.NLdStQ.dQ) << 1;
3809 UInt regN, regM;
3810 UInt D = regD >> 4;
3811 UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
3812 UInt insn;
3813 vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
3814 regD &= 0xF;
3815 if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
3816 regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
3817 regM = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
3818 } else {
3819 regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
3820 regM = 15;
3821 }
3822 insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3823 regN, regD, X1010, X1000, regM);
3824 *p++ = insn;
3825 goto done;
3826 }
3827 case ARMin_NLdStD: {
3828 UInt regD = dregNo(i->ARMin.NLdStD.dD);
3829 UInt regN, regM;
3830 UInt D = regD >> 4;
3831 UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
3832 UInt insn;
3833 vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
3834 regD &= 0xF;
3835 if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
3836 regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
3837 regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
3838 } else {
3839 regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
3840 regM = 15;
3841 }
3842 insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3843 regN, regD, X0111, X1000, regM);
3844 *p++ = insn;
3845 goto done;
3846 }
3847 case ARMin_NUnaryS: {
3848 UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
3849 UInt regD, D;
3850 UInt regM, M;
3851 UInt size = i->ARMin.NUnaryS.size;
3852 UInt insn;
3853 UInt opc, opc1, opc2;
3854 switch (i->ARMin.NUnaryS.op) {
3855 case ARMneon_VDUP:
3856 if (i->ARMin.NUnaryS.size >= 16)
3857 goto bad;
3858 if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
3859 goto bad;
3860 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3861 goto bad;
3862 regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
3863 ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
3864 : dregNo(i->ARMin.NUnaryS.dst->reg);
3865 regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
3866 ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
3867 : dregNo(i->ARMin.NUnaryS.src->reg);
3868 D = regD >> 4;
3869 M = regM >> 4;
3870 regD &= 0xf;
3871 regM &= 0xf;
3872 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
3873 (i->ARMin.NUnaryS.size & 0xf), regD,
3874 X1100, BITS4(0,Q,M,0), regM);
3875 *p++ = insn;
3876 goto done;
3877 case ARMneon_SETELEM:
3878 regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
3879 dregNo(i->ARMin.NUnaryS.dst->reg);
3880 regM = iregNo(i->ARMin.NUnaryS.src->reg);
3881 M = regM >> 4;
3882 D = regD >> 4;
3883 regM &= 0xF;
3884 regD &= 0xF;
3885 if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
3886 goto bad;
3887 switch (size) {
3888 case 0:
3889 if (i->ARMin.NUnaryS.dst->index > 7)
3890 goto bad;
3891 opc = X1000 | i->ARMin.NUnaryS.dst->index;
3892 break;
3893 case 1:
3894 if (i->ARMin.NUnaryS.dst->index > 3)
3895 goto bad;
3896 opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
3897 break;
3898 case 2:
3899 if (i->ARMin.NUnaryS.dst->index > 1)
3900 goto bad;
3901 opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
3902 break;
3903 default:
3904 goto bad;
3905 }
3906 opc1 = (opc >> 2) & 3;
3907 opc2 = opc & 3;
3908 insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
3909 regD, regM, X1011,
3910 BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
3911 *p++ = insn;
3912 goto done;
3913 case ARMneon_GETELEMU:
3914 regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
3915 dregNo(i->ARMin.NUnaryS.src->reg);
3916 regD = iregNo(i->ARMin.NUnaryS.dst->reg);
3917 M = regM >> 4;
3918 D = regD >> 4;
3919 regM &= 0xF;
3920 regD &= 0xF;
3921 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3922 goto bad;
3923 switch (size) {
3924 case 0:
3925 if (Q && i->ARMin.NUnaryS.src->index > 7) {
3926 regM++;
3927 i->ARMin.NUnaryS.src->index -= 8;
3928 }
3929 if (i->ARMin.NUnaryS.src->index > 7)
3930 goto bad;
3931 opc = X1000 | i->ARMin.NUnaryS.src->index;
3932 break;
3933 case 1:
3934 if (Q && i->ARMin.NUnaryS.src->index > 3) {
3935 regM++;
3936 i->ARMin.NUnaryS.src->index -= 4;
3937 }
3938 if (i->ARMin.NUnaryS.src->index > 3)
3939 goto bad;
3940 opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3941 break;
3942 case 2:
3943 goto bad;
3944 default:
3945 goto bad;
3946 }
3947 opc1 = (opc >> 2) & 3;
3948 opc2 = opc & 3;
3949 insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
3950 regM, regD, X1011,
3951 BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3952 *p++ = insn;
3953 goto done;
3954 case ARMneon_GETELEMS:
3955 regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
3956 dregNo(i->ARMin.NUnaryS.src->reg);
3957 regD = iregNo(i->ARMin.NUnaryS.dst->reg);
3958 M = regM >> 4;
3959 D = regD >> 4;
3960 regM &= 0xF;
3961 regD &= 0xF;
3962 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3963 goto bad;
3964 switch (size) {
3965 case 0:
3966 if (Q && i->ARMin.NUnaryS.src->index > 7) {
3967 regM++;
3968 i->ARMin.NUnaryS.src->index -= 8;
3969 }
3970 if (i->ARMin.NUnaryS.src->index > 7)
3971 goto bad;
3972 opc = X1000 | i->ARMin.NUnaryS.src->index;
3973 break;
3974 case 1:
3975 if (Q && i->ARMin.NUnaryS.src->index > 3) {
3976 regM++;
3977 i->ARMin.NUnaryS.src->index -= 4;
3978 }
3979 if (i->ARMin.NUnaryS.src->index > 3)
3980 goto bad;
3981 opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3982 break;
3983 case 2:
3984 if (Q && i->ARMin.NUnaryS.src->index > 1) {
3985 regM++;
3986 i->ARMin.NUnaryS.src->index -= 2;
3987 }
3988 if (i->ARMin.NUnaryS.src->index > 1)
3989 goto bad;
3990 opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
3991 break;
3992 default:
3993 goto bad;
3994 }
3995 opc1 = (opc >> 2) & 3;
3996 opc2 = opc & 3;
3997 insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
3998 regM, regD, X1011,
3999 BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
4000 *p++ = insn;
4001 goto done;
4002 default:
4003 goto bad;
4004 }
4005 }
4006 case ARMin_NUnary: {
4007 UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
4008 UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
4009 ? (qregNo(i->ARMin.NUnary.dst) << 1)
4010 : dregNo(i->ARMin.NUnary.dst);
4011 UInt regM, M;
4012 UInt D = regD >> 4;
4013 UInt sz1 = i->ARMin.NUnary.size >> 1;
4014 UInt sz2 = i->ARMin.NUnary.size & 1;
4015 UInt sz = i->ARMin.NUnary.size;
4016 UInt insn;
4017 UInt F = 0; /* TODO: floating point EQZ ??? */
4018 if (i->ARMin.NUnary.op != ARMneon_DUP) {
4019 regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
4020 ? (qregNo(i->ARMin.NUnary.src) << 1)
4021 : dregNo(i->ARMin.NUnary.src);
4022 M = regM >> 4;
4023 } else {
4024 regM = iregNo(i->ARMin.NUnary.src);
4025 M = regM >> 4;
4026 }
4027 regD &= 0xF;
4028 regM &= 0xF;
4029 switch (i->ARMin.NUnary.op) {
4030 case ARMneon_COPY: /* VMOV reg, reg */
4031 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
4032 BITS4(M,Q,M,1), regM);
4033 break;
4034 case ARMneon_COPYN: /* VMOVN regD, regQ */
4035 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4036 regD, X0010, BITS4(0,0,M,0), regM);
4037 break;
4038 case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
4039 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4040 regD, X0010, BITS4(1,0,M,0), regM);
4041 break;
4042 case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
4043 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4044 regD, X0010, BITS4(0,1,M,0), regM);
4045 break;
4046 case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
4047 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4048 regD, X0010, BITS4(1,1,M,0), regM);
4049 break;
4050 case ARMneon_COPYLS: /* VMOVL regQ, regD */
4051 if (sz >= 3)
4052 goto bad;
4053 insn = XXXXXXXX(0xF, X0010,
4054 BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4055 BITS4((sz == 0) ? 1 : 0,0,0,0),
4056 regD, X1010, BITS4(0,0,M,1), regM);
4057 break;
4058 case ARMneon_COPYLU: /* VMOVL regQ, regD */
4059 if (sz >= 3)
4060 goto bad;
4061 insn = XXXXXXXX(0xF, X0011,
4062 BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4063 BITS4((sz == 0) ? 1 : 0,0,0,0),
4064 regD, X1010, BITS4(0,0,M,1), regM);
4065 break;
4066 case ARMneon_NOT: /* VMVN reg, reg*/
4067 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4068 BITS4(1,Q,M,0), regM);
4069 break;
4070 case ARMneon_EQZ:
4071 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4072 regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
4073 break;
4074 case ARMneon_CNT:
4075 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4076 BITS4(0,Q,M,0), regM);
4077 break;
4078 case ARMneon_CLZ:
4079 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4080 regD, X0100, BITS4(1,Q,M,0), regM);
4081 break;
4082 case ARMneon_CLS:
4083 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4084 regD, X0100, BITS4(0,Q,M,0), regM);
4085 break;
4086 case ARMneon_ABS:
4087 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4088 regD, X0011, BITS4(0,Q,M,0), regM);
4089 break;
4090 case ARMneon_DUP:
4091 sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
4092 sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
4093 vassert(sz1 + sz2 < 2);
4094 insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
4095 X1011, BITS4(D,0,sz2,1), X0000);
4096 break;
4097 case ARMneon_REV16:
4098 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4099 regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
4100 break;
4101 case ARMneon_REV32:
4102 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4103 regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
4104 break;
4105 case ARMneon_REV64:
4106 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4107 regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
4108 break;
4109 case ARMneon_PADDLU:
4110 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4111 regD, X0010, BITS4(1,Q,M,0), regM);
4112 break;
4113 case ARMneon_PADDLS:
4114 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4115 regD, X0010, BITS4(0,Q,M,0), regM);
4116 break;
4117 case ARMneon_VQSHLNUU:
4118 insn = XXXXXXXX(0xF, X0011,
4119 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4120 sz & 0xf, regD, X0111,
4121 BITS4(sz >> 6,Q,M,1), regM);
4122 break;
4123 case ARMneon_VQSHLNSS:
4124 insn = XXXXXXXX(0xF, X0010,
4125 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4126 sz & 0xf, regD, X0111,
4127 BITS4(sz >> 6,Q,M,1), regM);
4128 break;
4129 case ARMneon_VQSHLNUS:
4130 insn = XXXXXXXX(0xF, X0011,
4131 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4132 sz & 0xf, regD, X0110,
4133 BITS4(sz >> 6,Q,M,1), regM);
4134 break;
4135 case ARMneon_VCVTFtoS:
4136 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4137 BITS4(0,Q,M,0), regM);
4138 break;
4139 case ARMneon_VCVTFtoU:
4140 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4141 BITS4(1,Q,M,0), regM);
4142 break;
4143 case ARMneon_VCVTStoF:
4144 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4145 BITS4(0,Q,M,0), regM);
4146 break;
4147 case ARMneon_VCVTUtoF:
4148 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4149 BITS4(1,Q,M,0), regM);
4150 break;
4151 case ARMneon_VCVTFtoFixedU:
4152 sz1 = (sz >> 5) & 1;
4153 sz2 = (sz >> 4) & 1;
4154 sz &= 0xf;
4155 insn = XXXXXXXX(0xF, X0011,
4156 BITS4(1,D,sz1,sz2), sz, regD, X1111,
4157 BITS4(0,Q,M,1), regM);
4158 break;
4159 case ARMneon_VCVTFtoFixedS:
4160 sz1 = (sz >> 5) & 1;
4161 sz2 = (sz >> 4) & 1;
4162 sz &= 0xf;
4163 insn = XXXXXXXX(0xF, X0010,
4164 BITS4(1,D,sz1,sz2), sz, regD, X1111,
4165 BITS4(0,Q,M,1), regM);
4166 break;
4167 case ARMneon_VCVTFixedUtoF:
4168 sz1 = (sz >> 5) & 1;
4169 sz2 = (sz >> 4) & 1;
4170 sz &= 0xf;
4171 insn = XXXXXXXX(0xF, X0011,
4172 BITS4(1,D,sz1,sz2), sz, regD, X1110,
4173 BITS4(0,Q,M,1), regM);
4174 break;
4175 case ARMneon_VCVTFixedStoF:
4176 sz1 = (sz >> 5) & 1;
4177 sz2 = (sz >> 4) & 1;
4178 sz &= 0xf;
4179 insn = XXXXXXXX(0xF, X0010,
4180 BITS4(1,D,sz1,sz2), sz, regD, X1110,
4181 BITS4(0,Q,M,1), regM);
4182 break;
4183 case ARMneon_VCVTF32toF16:
4184 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
4185 BITS4(0,0,M,0), regM);
4186 break;
4187 case ARMneon_VCVTF16toF32:
4188 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
4189 BITS4(0,0,M,0), regM);
4190 break;
4191 case ARMneon_VRECIP:
4192 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4193 BITS4(0,Q,M,0), regM);
4194 break;
4195 case ARMneon_VRECIPF:
4196 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4197 BITS4(0,Q,M,0), regM);
4198 break;
4199 case ARMneon_VABSFP:
4200 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4201 BITS4(0,Q,M,0), regM);
4202 break;
4203 case ARMneon_VRSQRTEFP:
4204 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4205 BITS4(1,Q,M,0), regM);
4206 break;
4207 case ARMneon_VRSQRTE:
4208 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4209 BITS4(1,Q,M,0), regM);
4210 break;
4211 case ARMneon_VNEGF:
4212 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4213 BITS4(1,Q,M,0), regM);
4214 break;
4215
4216 default:
4217 goto bad;
4218 }
4219 *p++ = insn;
4220 goto done;
4221 }
4222 case ARMin_NDual: {
4223 UInt Q = i->ARMin.NDual.Q ? 1 : 0;
4224 UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
4225 ? (qregNo(i->ARMin.NDual.arg1) << 1)
4226 : dregNo(i->ARMin.NDual.arg1);
4227 UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
4228 ? (qregNo(i->ARMin.NDual.arg2) << 1)
4229 : dregNo(i->ARMin.NDual.arg2);
4230 UInt D = regD >> 4;
4231 UInt M = regM >> 4;
4232 UInt sz1 = i->ARMin.NDual.size >> 1;
4233 UInt sz2 = i->ARMin.NDual.size & 1;
4234 UInt insn;
4235 regD &= 0xF;
4236 regM &= 0xF;
4237 switch (i->ARMin.NDual.op) {
4238 case ARMneon_TRN: /* VTRN reg, reg */
4239 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4240 regD, X0000, BITS4(1,Q,M,0), regM);
4241 break;
4242 case ARMneon_ZIP: /* VZIP reg, reg */
4243 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4244 regD, X0001, BITS4(1,Q,M,0), regM);
4245 break;
4246 case ARMneon_UZP: /* VUZP reg, reg */
4247 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4248 regD, X0001, BITS4(0,Q,M,0), regM);
4249 break;
4250 default:
4251 goto bad;
4252 }
4253 *p++ = insn;
4254 goto done;
4255 }
4256 case ARMin_NBinary: {
4257 UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
4258 UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
4259 ? (qregNo(i->ARMin.NBinary.dst) << 1)
4260 : dregNo(i->ARMin.NBinary.dst);
4261 UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
4262 ? (qregNo(i->ARMin.NBinary.argL) << 1)
4263 : dregNo(i->ARMin.NBinary.argL);
4264 UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
4265 ? (qregNo(i->ARMin.NBinary.argR) << 1)
4266 : dregNo(i->ARMin.NBinary.argR);
4267 UInt sz1 = i->ARMin.NBinary.size >> 1;
4268 UInt sz2 = i->ARMin.NBinary.size & 1;
4269 UInt D = regD >> 4;
4270 UInt N = regN >> 4;
4271 UInt M = regM >> 4;
4272 UInt insn;
4273 regD &= 0xF;
4274 regM &= 0xF;
4275 regN &= 0xF;
4276 switch (i->ARMin.NBinary.op) {
4277 case ARMneon_VAND: /* VAND reg, reg, reg */
4278 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
4279 BITS4(N,Q,M,1), regM);
4280 break;
4281 case ARMneon_VORR: /* VORR reg, reg, reg*/
4282 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
4283 BITS4(N,Q,M,1), regM);
4284 break;
4285 case ARMneon_VXOR: /* VEOR reg, reg, reg */
4286 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
4287 BITS4(N,Q,M,1), regM);
4288 break;
4289 case ARMneon_VADD: /* VADD reg, reg, reg */
4290 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4291 X1000, BITS4(N,Q,M,0), regM);
4292 break;
4293 case ARMneon_VSUB: /* VSUB reg, reg, reg */
4294 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4295 X1000, BITS4(N,Q,M,0), regM);
4296 break;
4297 case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
4298 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4299 X0110, BITS4(N,Q,M,1), regM);
4300 break;
4301 case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
4302 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4303 X0110, BITS4(N,Q,M,1), regM);
4304 break;
4305 case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
4306 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4307 X0110, BITS4(N,Q,M,0), regM);
4308 break;
4309 case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
4310 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4311 X0110, BITS4(N,Q,M,0), regM);
4312 break;
4313 case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
4314 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4315 X0001, BITS4(N,Q,M,0), regM);
4316 break;
4317 case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
4318 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4319 X0001, BITS4(N,Q,M,0), regM);
4320 break;
4321 case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
4322 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4323 X0000, BITS4(N,Q,M,1), regM);
4324 break;
4325 case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
4326 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4327 X0000, BITS4(N,Q,M,1), regM);
4328 break;
4329 case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
4330 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4331 X0010, BITS4(N,Q,M,1), regM);
4332 break;
4333 case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
4334 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4335 X0010, BITS4(N,Q,M,1), regM);
4336 break;
4337 case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
4338 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4339 X0011, BITS4(N,Q,M,0), regM);
4340 break;
4341 case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
4342 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4343 X0011, BITS4(N,Q,M,0), regM);
4344 break;
4345 case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
4346 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4347 X0011, BITS4(N,Q,M,1), regM);
4348 break;
4349 case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
4350 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4351 X0011, BITS4(N,Q,M,1), regM);
4352 break;
4353 case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
4354 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4355 X1000, BITS4(N,Q,M,1), regM);
4356 break;
4357 case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
4358 if (i->ARMin.NBinary.size >= 16)
4359 goto bad;
4360 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
4361 i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
4362 regM);
4363 break;
4364 case ARMneon_VMUL:
4365 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4366 X1001, BITS4(N,Q,M,1), regM);
4367 break;
4368 case ARMneon_VMULLU:
4369 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
4370 X1100, BITS4(N,0,M,0), regM);
4371 break;
4372 case ARMneon_VMULLS:
4373 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4374 X1100, BITS4(N,0,M,0), regM);
4375 break;
4376 case ARMneon_VMULP:
4377 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4378 X1001, BITS4(N,Q,M,1), regM);
4379 break;
4380 case ARMneon_VMULFP:
4381 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4382 X1101, BITS4(N,Q,M,1), regM);
4383 break;
4384 case ARMneon_VMULLP:
4385 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4386 X1110, BITS4(N,0,M,0), regM);
4387 break;
4388 case ARMneon_VQDMULH:
4389 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4390 X1011, BITS4(N,Q,M,0), regM);
4391 break;
4392 case ARMneon_VQRDMULH:
4393 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4394 X1011, BITS4(N,Q,M,0), regM);
4395 break;
4396 case ARMneon_VQDMULL:
4397 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4398 X1101, BITS4(N,0,M,0), regM);
4399 break;
4400 case ARMneon_VTBL:
4401 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
4402 X1000, BITS4(N,0,M,0), regM);
4403 break;
4404 case ARMneon_VPADD:
4405 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4406 X1011, BITS4(N,Q,M,1), regM);
4407 break;
4408 case ARMneon_VPADDFP:
4409 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4410 X1101, BITS4(N,Q,M,0), regM);
4411 break;
4412 case ARMneon_VPMINU:
4413 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4414 X1010, BITS4(N,Q,M,1), regM);
4415 break;
4416 case ARMneon_VPMINS:
4417 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4418 X1010, BITS4(N,Q,M,1), regM);
4419 break;
4420 case ARMneon_VPMAXU:
4421 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4422 X1010, BITS4(N,Q,M,0), regM);
4423 break;
4424 case ARMneon_VPMAXS:
4425 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4426 X1010, BITS4(N,Q,M,0), regM);
4427 break;
4428 case ARMneon_VADDFP: /* VADD reg, reg, reg */
4429 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4430 X1101, BITS4(N,Q,M,0), regM);
4431 break;
4432 case ARMneon_VSUBFP: /* VADD reg, reg, reg */
4433 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4434 X1101, BITS4(N,Q,M,0), regM);
4435 break;
4436 case ARMneon_VABDFP: /* VABD reg, reg, reg */
4437 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4438 X1101, BITS4(N,Q,M,0), regM);
4439 break;
4440 case ARMneon_VMINF:
4441 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4442 X1111, BITS4(N,Q,M,0), regM);
4443 break;
4444 case ARMneon_VMAXF:
4445 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4446 X1111, BITS4(N,Q,M,0), regM);
4447 break;
4448 case ARMneon_VPMINF:
4449 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4450 X1111, BITS4(N,Q,M,0), regM);
4451 break;
4452 case ARMneon_VPMAXF:
4453 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4454 X1111, BITS4(N,Q,M,0), regM);
4455 break;
4456 case ARMneon_VRECPS:
4457 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
4458 BITS4(N,Q,M,1), regM);
4459 break;
4460 case ARMneon_VCGTF:
4461 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
4462 BITS4(N,Q,M,0), regM);
4463 break;
4464 case ARMneon_VCGEF:
4465 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
4466 BITS4(N,Q,M,0), regM);
4467 break;
4468 case ARMneon_VCEQF:
4469 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
4470 BITS4(N,Q,M,0), regM);
4471 break;
4472 case ARMneon_VRSQRTS:
4473 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
4474 BITS4(N,Q,M,1), regM);
4475 break;
4476 default:
4477 goto bad;
4478 }
4479 *p++ = insn;
4480 goto done;
4481 }
4482 case ARMin_NShift: {
4483 UInt Q = i->ARMin.NShift.Q ? 1 : 0;
4484 UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
4485 ? (qregNo(i->ARMin.NShift.dst) << 1)
4486 : dregNo(i->ARMin.NShift.dst);
4487 UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
4488 ? (qregNo(i->ARMin.NShift.argL) << 1)
4489 : dregNo(i->ARMin.NShift.argL);
4490 UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
4491 ? (qregNo(i->ARMin.NShift.argR) << 1)
4492 : dregNo(i->ARMin.NShift.argR);
4493 UInt sz1 = i->ARMin.NShift.size >> 1;
4494 UInt sz2 = i->ARMin.NShift.size & 1;
4495 UInt D = regD >> 4;
4496 UInt N = regN >> 4;
4497 UInt M = regM >> 4;
4498 UInt insn;
4499 regD &= 0xF;
4500 regM &= 0xF;
4501 regN &= 0xF;
4502 switch (i->ARMin.NShift.op) {
4503 case ARMneon_VSHL:
4504 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4505 X0100, BITS4(N,Q,M,0), regM);
4506 break;
4507 case ARMneon_VSAL:
4508 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4509 X0100, BITS4(N,Q,M,0), regM);
4510 break;
4511 case ARMneon_VQSHL:
4512 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4513 X0100, BITS4(N,Q,M,1), regM);
4514 break;
4515 case ARMneon_VQSAL:
4516 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4517 X0100, BITS4(N,Q,M,1), regM);
4518 break;
4519 default:
4520 goto bad;
4521 }
4522 *p++ = insn;
4523 goto done;
4524 }
4525 case ARMin_NShl64: {
4526 HReg regDreg = i->ARMin.NShl64.dst;
4527 HReg regMreg = i->ARMin.NShl64.src;
4528 UInt amt = i->ARMin.NShl64.amt;
4529 vassert(amt >= 1 && amt <= 63);
4530 vassert(hregClass(regDreg) == HRcFlt64);
4531 vassert(hregClass(regMreg) == HRcFlt64);
4532 UInt regD = dregNo(regDreg);
4533 UInt regM = dregNo(regMreg);
4534 UInt D = (regD >> 4) & 1;
4535 UInt Vd = regD & 0xF;
4536 UInt L = 1;
4537 UInt Q = 0; /* always 64-bit */
4538 UInt M = (regM >> 4) & 1;
4539 UInt Vm = regM & 0xF;
4540 UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
4541 amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
4542 *p++ = insn;
4543 goto done;
4544 }
4545 case ARMin_NeonImm: {
4546 UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
4547 UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
4548 dregNo(i->ARMin.NeonImm.dst);
4549 UInt D = regD >> 4;
4550 UInt imm = i->ARMin.NeonImm.imm->imm8;
4551 UInt tp = i->ARMin.NeonImm.imm->type;
4552 UInt j = imm >> 7;
4553 UInt imm3 = (imm >> 4) & 0x7;
4554 UInt imm4 = imm & 0xF;
4555 UInt cmode, op;
4556 UInt insn;
4557 regD &= 0xF;
4558 if (tp == 9)
4559 op = 1;
4560 else
4561 op = 0;
4562 switch (tp) {
4563 case 0:
4564 case 1:
4565 case 2:
4566 case 3:
4567 case 4:
4568 case 5:
4569 cmode = tp << 1;
4570 break;
4571 case 9:
4572 case 6:
4573 cmode = 14;
4574 break;
4575 case 7:
4576 cmode = 12;
4577 break;
4578 case 8:
4579 cmode = 13;
4580 break;
4581 case 10:
4582 cmode = 15;
4583 break;
4584 default:
4585 vpanic("ARMin_NeonImm");
4586
4587 }
4588 insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
4589 cmode, BITS4(0,Q,op,1), imm4);
4590 *p++ = insn;
4591 goto done;
4592 }
4593 case ARMin_NCMovQ: {
4594 UInt cc = (UInt)i->ARMin.NCMovQ.cond;
4595 UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
4596 UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
4597 UInt vM = qM & 0xF;
4598 UInt vD = qD & 0xF;
4599 UInt M = (qM >> 4) & 1;
4600 UInt D = (qD >> 4) & 1;
4601 vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
4602 /* b!cc here+8: !cc A00 0000 */
4603 UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
4604 *p++ = insn;
4605 /* vmov qD, qM */
4606 insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
4607 vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
4608 *p++ = insn;
4609 goto done;
4610 }
4611 case ARMin_Add32: {
4612 UInt regD = iregNo(i->ARMin.Add32.rD);
4613 UInt regN = iregNo(i->ARMin.Add32.rN);
4614 UInt imm32 = i->ARMin.Add32.imm32;
4615 vassert(regD != regN);
4616 /* MOV regD, imm32 */
4617 p = imm32_to_iregNo((UInt *)p, regD, imm32);
4618 /* ADD regD, regN, regD */
4619 UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
4620 *p++ = insn;
4621 goto done;
4622 }
4623
4624 case ARMin_EvCheck: {
4625 /* We generate:
4626 ldr r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
4627 subs r12, r12, #1 (A1)
4628 str r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
4629 bpl nofail
4630 ldr r12, [r8 + #0] 0 == offsetof(host_EvC_FAILADDR)
4631 bx r12
4632 nofail:
4633 */
4634 UInt* p0 = p;
4635 p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4636 i->ARMin.EvCheck.amCounter);
4637 *p++ = 0xE25CC001; /* subs r12, r12, #1 */
4638 p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
4639 i->ARMin.EvCheck.amCounter);
4640 *p++ = 0x5A000001; /* bpl nofail */
4641 p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4642 i->ARMin.EvCheck.amFailAddr);
4643 *p++ = 0xE12FFF1C; /* bx r12 */
4644 /* nofail: */
4645
4646 /* Crosscheck */
4647 vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
4648 goto done;
4649 }
4650
4651 case ARMin_ProfInc: {
4652 /* We generate:
4653 (ctrP is unknown now, so use 0x65556555 in the
4654 expectation that a later call to LibVEX_patchProfCtr
4655 will be used to fill in the immediate fields once the
4656 right value is known.)
4657 movw r12, lo16(0x65556555)
4658 movt r12, lo16(0x65556555)
4659 ldr r11, [r12]
4660 adds r11, r11, #1
4661 str r11, [r12]
4662 ldr r11, [r12+4]
4663 adc r11, r11, #0
4664 str r11, [r12+4]
4665 */
4666 p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555);
4667 *p++ = 0xE59CB000;
4668 *p++ = 0xE29BB001;
4669 *p++ = 0xE58CB000;
4670 *p++ = 0xE59CB004;
4671 *p++ = 0xE2ABB000;
4672 *p++ = 0xE58CB004;
4673 /* Tell the caller .. */
4674 vassert(!(*is_profInc));
4675 *is_profInc = True;
4676 goto done;
4677 }
4678
4679 /* ... */
4680 default:
4681 goto bad;
4682 }
4683
4684 bad:
4685 ppARMInstr(i);
4686 vpanic("emit_ARMInstr");
4687 /*NOTREACHED*/
4688
4689 done:
4690 vassert(((UChar*)p) - &buf[0] <= 32);
4691 return ((UChar*)p) - &buf[0];
4692 }
4693
4694
4695 /* How big is an event check? See case for ARMin_EvCheck in
4696 emit_ARMInstr just above. That crosschecks what this returns, so
4697 we can tell if we're inconsistent. */
evCheckSzB_ARM(void)4698 Int evCheckSzB_ARM ( void )
4699 {
4700 return 24;
4701 }
4702
4703
4704 /* NB: what goes on here has to be very closely coordinated with the
4705 emitInstr case for XDirect, above. */
chainXDirect_ARM(void * place_to_chain,void * disp_cp_chain_me_EXPECTED,void * place_to_jump_to)4706 VexInvalRange chainXDirect_ARM ( void* place_to_chain,
4707 void* disp_cp_chain_me_EXPECTED,
4708 void* place_to_jump_to )
4709 {
4710 /* What we're expecting to see is:
4711 movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
4712 movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
4713 blx r12
4714 viz
4715 <8 bytes generated by imm32_to_iregNo_EXACTLY2>
4716 E1 2F FF 3C
4717 */
4718 UInt* p = (UInt*)place_to_chain;
4719 vassert(0 == (3 & (HWord)p));
4720 vassert(is_imm32_to_iregNo_EXACTLY2(
4721 p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED)));
4722 vassert(p[2] == 0xE12FFF3C);
4723 /* And what we want to change it to is either:
4724 (general case)
4725 movw r12, lo16(place_to_jump_to)
4726 movt r12, hi16(place_to_jump_to)
4727 bx r12
4728 viz
4729 <8 bytes generated by imm32_to_iregNo_EXACTLY2>
4730 E1 2F FF 1C
4731 ---OR---
4732 in the case where the displacement falls within 26 bits
4733 b disp24; undef; undef
4734 viz
4735 EA <3 bytes == disp24>
4736 FF 00 00 00
4737 FF 00 00 00
4738
4739 In both cases the replacement has the same length as the original.
4740 To remain sane & verifiable,
4741 (1) limit the displacement for the short form to
4742 (say) +/- 30 million, so as to avoid wraparound
4743 off-by-ones
4744 (2) even if the short form is applicable, once every (say)
4745 1024 times use the long form anyway, so as to maintain
4746 verifiability
4747 */
4748
4749 /* This is the delta we need to put into a B insn. It's relative
4750 to the start of the next-but-one insn, hence the -8. */
4751 Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)8;
4752 Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
4753 vassert(0 == (delta & (Long)3));
4754
4755 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4756 if (shortOK) {
4757 shortCTR++; // thread safety bleh
4758 if (0 == (shortCTR & 0x3FF)) {
4759 shortOK = False;
4760 if (0)
4761 vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
4762 "using long form\n", shortCTR);
4763 }
4764 }
4765
4766 /* And make the modifications. */
4767 if (shortOK) {
4768 Int simm24 = (Int)(delta >> 2);
4769 vassert(simm24 == ((simm24 << 8) >> 8));
4770 p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
4771 p[1] = 0xFF000000;
4772 p[2] = 0xFF000000;
4773 } else {
4774 (void)imm32_to_iregNo_EXACTLY2(
4775 p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to));
4776 p[2] = 0xE12FFF1C;
4777 }
4778
4779 VexInvalRange vir = {(HWord)p, 12};
4780 return vir;
4781 }
4782
4783
4784 /* NB: what goes on here has to be very closely coordinated with the
4785 emitInstr case for XDirect, above. */
unchainXDirect_ARM(void * place_to_unchain,void * place_to_jump_to_EXPECTED,void * disp_cp_chain_me)4786 VexInvalRange unchainXDirect_ARM ( void* place_to_unchain,
4787 void* place_to_jump_to_EXPECTED,
4788 void* disp_cp_chain_me )
4789 {
4790 /* What we're expecting to see is:
4791 (general case)
4792 movw r12, lo16(place_to_jump_to_EXPECTED)
4793 movt r12, lo16(place_to_jump_to_EXPECTED)
4794 bx r12
4795 viz
4796 <8 bytes generated by imm32_to_iregNo_EXACTLY2>
4797 E1 2F FF 1C
4798 ---OR---
4799 in the case where the displacement falls within 26 bits
4800 b disp24; undef; undef
4801 viz
4802 EA <3 bytes == disp24>
4803 FF 00 00 00
4804 FF 00 00 00
4805 */
4806 UInt* p = (UInt*)place_to_unchain;
4807 vassert(0 == (3 & (HWord)p));
4808
4809 Bool valid = False;
4810 if (is_imm32_to_iregNo_EXACTLY2(
4811 p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to_EXPECTED))
4812 && p[2] == 0xE12FFF1C) {
4813 valid = True; /* it's the long form */
4814 if (0)
4815 vex_printf("QQQ unchainXDirect_ARM: found long form\n");
4816 } else
4817 if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
4818 /* It's the short form. Check the displacement is right. */
4819 Int simm24 = p[0] & 0x00FFFFFF;
4820 simm24 <<= 8; simm24 >>= 8;
4821 if ((UChar*)p + (simm24 << 2) + 8 == (UChar*)place_to_jump_to_EXPECTED) {
4822 valid = True;
4823 if (0)
4824 vex_printf("QQQ unchainXDirect_ARM: found short form\n");
4825 }
4826 }
4827 vassert(valid);
4828
4829 /* And what we want to change it to is:
4830 movw r12, lo16(disp_cp_chain_me)
4831 movt r12, hi16(disp_cp_chain_me)
4832 blx r12
4833 viz
4834 <8 bytes generated by imm32_to_iregNo_EXACTLY2>
4835 E1 2F FF 3C
4836 */
4837 (void)imm32_to_iregNo_EXACTLY2(
4838 p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me));
4839 p[2] = 0xE12FFF3C;
4840 VexInvalRange vir = {(HWord)p, 12};
4841 return vir;
4842 }
4843
4844
4845 /* Patch the counter address into a profile inc point, as previously
4846 created by the ARMin_ProfInc case for emit_ARMInstr. */
patchProfInc_ARM(void * place_to_patch,ULong * location_of_counter)4847 VexInvalRange patchProfInc_ARM ( void* place_to_patch,
4848 ULong* location_of_counter )
4849 {
4850 vassert(sizeof(ULong*) == 4);
4851 UInt* p = (UInt*)place_to_patch;
4852 vassert(0 == (3 & (HWord)p));
4853 vassert(is_imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555));
4854 vassert(p[2] == 0xE59CB000);
4855 vassert(p[3] == 0xE29BB001);
4856 vassert(p[4] == 0xE58CB000);
4857 vassert(p[5] == 0xE59CB004);
4858 vassert(p[6] == 0xE2ABB000);
4859 vassert(p[7] == 0xE58CB004);
4860 imm32_to_iregNo_EXACTLY2(p, /*r*/12,
4861 (UInt)Ptr_to_ULong(location_of_counter));
4862 VexInvalRange vir = {(HWord)p, 8};
4863 return vir;
4864 }
4865
4866
4867 #undef BITS4
4868 #undef X0000
4869 #undef X0001
4870 #undef X0010
4871 #undef X0011
4872 #undef X0100
4873 #undef X0101
4874 #undef X0110
4875 #undef X0111
4876 #undef X1000
4877 #undef X1001
4878 #undef X1010
4879 #undef X1011
4880 #undef X1100
4881 #undef X1101
4882 #undef X1110
4883 #undef X1111
4884 #undef XXXXX___
4885 #undef XXXXXX__
4886 #undef XXX___XX
4887 #undef XXXXX__X
4888 #undef XXXXXXXX
4889 #undef XX______
4890
4891 /*---------------------------------------------------------------*/
4892 /*--- end host_arm_defs.c ---*/
4893 /*---------------------------------------------------------------*/
4894