1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_defs.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
12
13 NEON support is
14 Copyright (C) 2010-2017 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
17
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
22
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 02110-1301, USA.
32
33 The GNU General Public License is contained in the file COPYING.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
39
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_arm_defs.h"
43
44 UInt arm_hwcaps = 0;
45
46
47 /* --------- Registers. --------- */
48
getRRegUniverse_ARM(void)49 const RRegUniverse* getRRegUniverse_ARM ( void )
50 {
51 /* The real-register universe is a big constant, so we just want to
52 initialise it once. */
53 static RRegUniverse rRegUniverse_ARM;
54 static Bool rRegUniverse_ARM_initted = False;
55
56 /* Handy shorthand, nothing more */
57 RRegUniverse* ru = &rRegUniverse_ARM;
58
59 /* This isn't thread-safe. Sigh. */
60 if (LIKELY(rRegUniverse_ARM_initted))
61 return ru;
62
63 RRegUniverse__init(ru);
64
65 /* Add the registers. The initial segment of this array must be
66 those available for allocation by reg-alloc, and those that
67 follow are not available for allocation. */
68
69 /* Callee saves ones are listed first, since we prefer them
70 if they're available. */
71 ru->regs[ru->size++] = hregARM_R4();
72 ru->regs[ru->size++] = hregARM_R5();
73 ru->regs[ru->size++] = hregARM_R6();
74 ru->regs[ru->size++] = hregARM_R7();
75 ru->regs[ru->size++] = hregARM_R10();
76 ru->regs[ru->size++] = hregARM_R11();
77 /* Otherwise we'll have to slum it out with caller-saves ones. */
78 ru->regs[ru->size++] = hregARM_R0();
79 ru->regs[ru->size++] = hregARM_R1();
80 ru->regs[ru->size++] = hregARM_R2();
81 ru->regs[ru->size++] = hregARM_R3();
82 ru->regs[ru->size++] = hregARM_R9();
83 /* FP registers. Note: these are all callee-save. Yay! Hence we
84 don't need to mention them as trashed in getHRegUsage for
85 ARMInstr_Call. */
86 ru->regs[ru->size++] = hregARM_D8();
87 ru->regs[ru->size++] = hregARM_D9();
88 ru->regs[ru->size++] = hregARM_D10();
89 ru->regs[ru->size++] = hregARM_D11();
90 ru->regs[ru->size++] = hregARM_D12();
91 ru->regs[ru->size++] = hregARM_S26();
92 ru->regs[ru->size++] = hregARM_S27();
93 ru->regs[ru->size++] = hregARM_S28();
94 ru->regs[ru->size++] = hregARM_S29();
95 ru->regs[ru->size++] = hregARM_S30();
96 ru->regs[ru->size++] = hregARM_Q8();
97 ru->regs[ru->size++] = hregARM_Q9();
98 ru->regs[ru->size++] = hregARM_Q10();
99 ru->regs[ru->size++] = hregARM_Q11();
100 ru->regs[ru->size++] = hregARM_Q12();
101 ru->allocable = ru->size;
102
103 /* And other regs, not available to the allocator. */
104
105 // unavail: r8 as GSP
106 // r12 is used as a spill/reload temporary
107 // r13 as SP
108 // r14 as LR
109 // r15 as PC
110 //
111 // All in all, we have 11 allocatable integer registers:
112 // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
113 // and r12 dedicated as a spill temporary.
114 // 13 14 and 15 are not under the allocator's control.
115 //
116 // Hence for the allocatable registers we have:
117 //
118 // callee-saved: 4 5 6 7 (8) 9 10 11
119 // caller-saved: 0 1 2 3
120 // Note 9 is ambiguous: the base EABI does not give an e/r-saved
121 // designation for it, but the Linux instantiation of the ABI
122 // specifies it as callee-saved.
123 //
124 // If the set of available registers changes or if the e/r status
125 // changes, be sure to re-check/sync the definition of
126 // getHRegUsage for ARMInstr_Call too.
127 ru->regs[ru->size++] = hregARM_R8();
128 ru->regs[ru->size++] = hregARM_R12();
129 ru->regs[ru->size++] = hregARM_R13();
130 ru->regs[ru->size++] = hregARM_R14();
131 ru->regs[ru->size++] = hregARM_R15();
132 ru->regs[ru->size++] = hregARM_Q13();
133 ru->regs[ru->size++] = hregARM_Q14();
134 ru->regs[ru->size++] = hregARM_Q15();
135
136 rRegUniverse_ARM_initted = True;
137
138 RRegUniverse__check_is_sane(ru);
139 return ru;
140 }
141
142
ppHRegARM(HReg reg)143 void ppHRegARM ( HReg reg ) {
144 Int r;
145 /* Be generic for all virtual regs. */
146 if (hregIsVirtual(reg)) {
147 ppHReg(reg);
148 return;
149 }
150 /* But specific for real regs. */
151 switch (hregClass(reg)) {
152 case HRcInt32:
153 r = hregEncoding(reg);
154 vassert(r >= 0 && r < 16);
155 vex_printf("r%d", r);
156 return;
157 case HRcFlt64:
158 r = hregEncoding(reg);
159 vassert(r >= 0 && r < 32);
160 vex_printf("d%d", r);
161 return;
162 case HRcFlt32:
163 r = hregEncoding(reg);
164 vassert(r >= 0 && r < 32);
165 vex_printf("s%d", r);
166 return;
167 case HRcVec128:
168 r = hregEncoding(reg);
169 vassert(r >= 0 && r < 16);
170 vex_printf("q%d", r);
171 return;
172 default:
173 vpanic("ppHRegARM");
174 }
175 }
176
177
178 /* --------- Condition codes, ARM encoding. --------- */
179
showARMCondCode(ARMCondCode cond)180 const HChar* showARMCondCode ( ARMCondCode cond ) {
181 switch (cond) {
182 case ARMcc_EQ: return "eq";
183 case ARMcc_NE: return "ne";
184 case ARMcc_HS: return "hs";
185 case ARMcc_LO: return "lo";
186 case ARMcc_MI: return "mi";
187 case ARMcc_PL: return "pl";
188 case ARMcc_VS: return "vs";
189 case ARMcc_VC: return "vc";
190 case ARMcc_HI: return "hi";
191 case ARMcc_LS: return "ls";
192 case ARMcc_GE: return "ge";
193 case ARMcc_LT: return "lt";
194 case ARMcc_GT: return "gt";
195 case ARMcc_LE: return "le";
196 case ARMcc_AL: return "al"; // default
197 case ARMcc_NV: return "nv";
198 default: vpanic("showARMCondCode");
199 }
200 }
201
202
203 /* --------- Mem AModes: Addressing Mode 1 --------- */
204
ARMAMode1_RI(HReg reg,Int simm13)205 ARMAMode1* ARMAMode1_RI ( HReg reg, Int simm13 ) {
206 ARMAMode1* am = LibVEX_Alloc_inline(sizeof(ARMAMode1));
207 am->tag = ARMam1_RI;
208 am->ARMam1.RI.reg = reg;
209 am->ARMam1.RI.simm13 = simm13;
210 vassert(-4095 <= simm13 && simm13 <= 4095);
211 return am;
212 }
ARMAMode1_RRS(HReg base,HReg index,UInt shift)213 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
214 ARMAMode1* am = LibVEX_Alloc_inline(sizeof(ARMAMode1));
215 am->tag = ARMam1_RRS;
216 am->ARMam1.RRS.base = base;
217 am->ARMam1.RRS.index = index;
218 am->ARMam1.RRS.shift = shift;
219 vassert(0 <= shift && shift <= 3);
220 return am;
221 }
222
ppARMAMode1(ARMAMode1 * am)223 void ppARMAMode1 ( ARMAMode1* am ) {
224 switch (am->tag) {
225 case ARMam1_RI:
226 vex_printf("%d(", am->ARMam1.RI.simm13);
227 ppHRegARM(am->ARMam1.RI.reg);
228 vex_printf(")");
229 break;
230 case ARMam1_RRS:
231 vex_printf("(");
232 ppHRegARM(am->ARMam1.RRS.base);
233 vex_printf(",");
234 ppHRegARM(am->ARMam1.RRS.index);
235 vex_printf(",%u)", am->ARMam1.RRS.shift);
236 break;
237 default:
238 vassert(0);
239 }
240 }
241
addRegUsage_ARMAMode1(HRegUsage * u,ARMAMode1 * am)242 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
243 switch (am->tag) {
244 case ARMam1_RI:
245 addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
246 return;
247 case ARMam1_RRS:
248 // addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
249 // addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
250 // return;
251 default:
252 vpanic("addRegUsage_ARMAmode1");
253 }
254 }
255
mapRegs_ARMAMode1(HRegRemap * m,ARMAMode1 * am)256 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
257 switch (am->tag) {
258 case ARMam1_RI:
259 am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
260 return;
261 case ARMam1_RRS:
262 //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
263 //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
264 //return;
265 default:
266 vpanic("mapRegs_ARMAmode1");
267 }
268 }
269
270
271 /* --------- Mem AModes: Addressing Mode 2 --------- */
272
ARMAMode2_RI(HReg reg,Int simm9)273 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
274 ARMAMode2* am = LibVEX_Alloc_inline(sizeof(ARMAMode2));
275 am->tag = ARMam2_RI;
276 am->ARMam2.RI.reg = reg;
277 am->ARMam2.RI.simm9 = simm9;
278 vassert(-255 <= simm9 && simm9 <= 255);
279 return am;
280 }
ARMAMode2_RR(HReg base,HReg index)281 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
282 ARMAMode2* am = LibVEX_Alloc_inline(sizeof(ARMAMode2));
283 am->tag = ARMam2_RR;
284 am->ARMam2.RR.base = base;
285 am->ARMam2.RR.index = index;
286 return am;
287 }
288
ppARMAMode2(ARMAMode2 * am)289 void ppARMAMode2 ( ARMAMode2* am ) {
290 switch (am->tag) {
291 case ARMam2_RI:
292 vex_printf("%d(", am->ARMam2.RI.simm9);
293 ppHRegARM(am->ARMam2.RI.reg);
294 vex_printf(")");
295 break;
296 case ARMam2_RR:
297 vex_printf("(");
298 ppHRegARM(am->ARMam2.RR.base);
299 vex_printf(",");
300 ppHRegARM(am->ARMam2.RR.index);
301 vex_printf(")");
302 break;
303 default:
304 vassert(0);
305 }
306 }
307
addRegUsage_ARMAMode2(HRegUsage * u,ARMAMode2 * am)308 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
309 switch (am->tag) {
310 case ARMam2_RI:
311 addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
312 return;
313 case ARMam2_RR:
314 // addHRegUse(u, HRmRead, am->ARMam2.RR.base);
315 // addHRegUse(u, HRmRead, am->ARMam2.RR.index);
316 // return;
317 default:
318 vpanic("addRegUsage_ARMAmode2");
319 }
320 }
321
mapRegs_ARMAMode2(HRegRemap * m,ARMAMode2 * am)322 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
323 switch (am->tag) {
324 case ARMam2_RI:
325 am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
326 return;
327 case ARMam2_RR:
328 //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
329 //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
330 //return;
331 default:
332 vpanic("mapRegs_ARMAmode2");
333 }
334 }
335
336
337 /* --------- Mem AModes: Addressing Mode VFP --------- */
338
mkARMAModeV(HReg reg,Int simm11)339 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
340 ARMAModeV* am = LibVEX_Alloc_inline(sizeof(ARMAModeV));
341 vassert(simm11 >= -1020 && simm11 <= 1020);
342 vassert(0 == (simm11 & 3));
343 am->reg = reg;
344 am->simm11 = simm11;
345 return am;
346 }
347
ppARMAModeV(ARMAModeV * am)348 void ppARMAModeV ( ARMAModeV* am ) {
349 vex_printf("%d(", am->simm11);
350 ppHRegARM(am->reg);
351 vex_printf(")");
352 }
353
addRegUsage_ARMAModeV(HRegUsage * u,ARMAModeV * am)354 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
355 addHRegUse(u, HRmRead, am->reg);
356 }
357
mapRegs_ARMAModeV(HRegRemap * m,ARMAModeV * am)358 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
359 am->reg = lookupHRegRemap(m, am->reg);
360 }
361
362
363 /* --------- Mem AModes: Addressing Mode Neon ------- */
364
mkARMAModeN_RR(HReg rN,HReg rM)365 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
366 ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
367 am->tag = ARMamN_RR;
368 am->ARMamN.RR.rN = rN;
369 am->ARMamN.RR.rM = rM;
370 return am;
371 }
372
mkARMAModeN_R(HReg rN)373 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
374 ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
375 am->tag = ARMamN_R;
376 am->ARMamN.R.rN = rN;
377 return am;
378 }
379
addRegUsage_ARMAModeN(HRegUsage * u,ARMAModeN * am)380 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
381 if (am->tag == ARMamN_R) {
382 addHRegUse(u, HRmRead, am->ARMamN.R.rN);
383 } else {
384 addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
385 addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
386 }
387 }
388
mapRegs_ARMAModeN(HRegRemap * m,ARMAModeN * am)389 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
390 if (am->tag == ARMamN_R) {
391 am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
392 } else {
393 am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
394 am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
395 }
396 }
397
ppARMAModeN(ARMAModeN * am)398 void ppARMAModeN ( ARMAModeN* am ) {
399 vex_printf("[");
400 if (am->tag == ARMamN_R) {
401 ppHRegARM(am->ARMamN.R.rN);
402 } else {
403 ppHRegARM(am->ARMamN.RR.rN);
404 }
405 vex_printf("]");
406 if (am->tag == ARMamN_RR) {
407 vex_printf(", ");
408 ppHRegARM(am->ARMamN.RR.rM);
409 }
410 }
411
412
413 /* --------- Reg or imm-8x4 operands --------- */
414
ROR32(UInt x,UInt sh)415 static UInt ROR32 ( UInt x, UInt sh ) {
416 vassert(sh >= 0 && sh < 32);
417 if (sh == 0)
418 return x;
419 else
420 return (x << (32-sh)) | (x >> sh);
421 }
422
ARMRI84_I84(UShort imm8,UShort imm4)423 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
424 ARMRI84* ri84 = LibVEX_Alloc_inline(sizeof(ARMRI84));
425 ri84->tag = ARMri84_I84;
426 ri84->ARMri84.I84.imm8 = imm8;
427 ri84->ARMri84.I84.imm4 = imm4;
428 vassert(imm8 >= 0 && imm8 <= 255);
429 vassert(imm4 >= 0 && imm4 <= 15);
430 return ri84;
431 }
ARMRI84_R(HReg reg)432 ARMRI84* ARMRI84_R ( HReg reg ) {
433 ARMRI84* ri84 = LibVEX_Alloc_inline(sizeof(ARMRI84));
434 ri84->tag = ARMri84_R;
435 ri84->ARMri84.R.reg = reg;
436 return ri84;
437 }
438
ppARMRI84(ARMRI84 * ri84)439 void ppARMRI84 ( ARMRI84* ri84 ) {
440 switch (ri84->tag) {
441 case ARMri84_I84:
442 vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
443 2 * ri84->ARMri84.I84.imm4));
444 break;
445 case ARMri84_R:
446 ppHRegARM(ri84->ARMri84.R.reg);
447 break;
448 default:
449 vassert(0);
450 }
451 }
452
addRegUsage_ARMRI84(HRegUsage * u,ARMRI84 * ri84)453 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
454 switch (ri84->tag) {
455 case ARMri84_I84:
456 return;
457 case ARMri84_R:
458 addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
459 return;
460 default:
461 vpanic("addRegUsage_ARMRI84");
462 }
463 }
464
mapRegs_ARMRI84(HRegRemap * m,ARMRI84 * ri84)465 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
466 switch (ri84->tag) {
467 case ARMri84_I84:
468 return;
469 case ARMri84_R:
470 ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
471 return;
472 default:
473 vpanic("mapRegs_ARMRI84");
474 }
475 }
476
477
478 /* --------- Reg or imm5 operands --------- */
479
ARMRI5_I5(UInt imm5)480 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
481 ARMRI5* ri5 = LibVEX_Alloc_inline(sizeof(ARMRI5));
482 ri5->tag = ARMri5_I5;
483 ri5->ARMri5.I5.imm5 = imm5;
484 vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
485 return ri5;
486 }
ARMRI5_R(HReg reg)487 ARMRI5* ARMRI5_R ( HReg reg ) {
488 ARMRI5* ri5 = LibVEX_Alloc_inline(sizeof(ARMRI5));
489 ri5->tag = ARMri5_R;
490 ri5->ARMri5.R.reg = reg;
491 return ri5;
492 }
493
ppARMRI5(ARMRI5 * ri5)494 void ppARMRI5 ( ARMRI5* ri5 ) {
495 switch (ri5->tag) {
496 case ARMri5_I5:
497 vex_printf("%u", ri5->ARMri5.I5.imm5);
498 break;
499 case ARMri5_R:
500 ppHRegARM(ri5->ARMri5.R.reg);
501 break;
502 default:
503 vassert(0);
504 }
505 }
506
addRegUsage_ARMRI5(HRegUsage * u,ARMRI5 * ri5)507 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
508 switch (ri5->tag) {
509 case ARMri5_I5:
510 return;
511 case ARMri5_R:
512 addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
513 return;
514 default:
515 vpanic("addRegUsage_ARMRI5");
516 }
517 }
518
mapRegs_ARMRI5(HRegRemap * m,ARMRI5 * ri5)519 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
520 switch (ri5->tag) {
521 case ARMri5_I5:
522 return;
523 case ARMri5_R:
524 ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
525 return;
526 default:
527 vpanic("mapRegs_ARMRI5");
528 }
529 }
530
531 /* -------- Neon Immediate operatnd --------- */
532
ARMNImm_TI(UInt type,UInt imm8)533 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
534 ARMNImm* i = LibVEX_Alloc_inline(sizeof(ARMNImm));
535 i->type = type;
536 i->imm8 = imm8;
537 return i;
538 }
539
ARMNImm_to_Imm64(ARMNImm * imm)540 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
541 int i, j;
542 ULong y, x = imm->imm8;
543 switch (imm->type) {
544 case 3:
545 x = x << 8; /* fallthrough */
546 case 2:
547 x = x << 8; /* fallthrough */
548 case 1:
549 x = x << 8; /* fallthrough */
550 case 0:
551 return (x << 32) | x;
552 case 5:
553 case 6:
554 if (imm->type == 5)
555 x = x << 8;
556 else
557 x = (x << 8) | x;
558 /* fallthrough */
559 case 4:
560 x = (x << 16) | x;
561 return (x << 32) | x;
562 case 8:
563 x = (x << 8) | 0xFF;
564 /* fallthrough */
565 case 7:
566 x = (x << 8) | 0xFF;
567 return (x << 32) | x;
568 case 9:
569 x = 0;
570 for (i = 7; i >= 0; i--) {
571 y = ((ULong)imm->imm8 >> i) & 1;
572 for (j = 0; j < 8; j++) {
573 x = (x << 1) | y;
574 }
575 }
576 return x;
577 case 10:
578 x |= (x & 0x80) << 5;
579 x |= (~x & 0x40) << 5;
580 x &= 0x187F; /* 0001 1000 0111 1111 */
581 x |= (x & 0x40) << 4;
582 x |= (x & 0x40) << 3;
583 x |= (x & 0x40) << 2;
584 x |= (x & 0x40) << 1;
585 x = x << 19;
586 x = (x << 32) | x;
587 return x;
588 default:
589 vpanic("ARMNImm_to_Imm64");
590 }
591 }
592
Imm64_to_ARMNImm(ULong x)593 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
594 ARMNImm tmp;
595 if ((x & 0xFFFFFFFF) == (x >> 32)) {
596 if ((x & 0xFFFFFF00) == 0)
597 return ARMNImm_TI(0, x & 0xFF);
598 if ((x & 0xFFFF00FF) == 0)
599 return ARMNImm_TI(1, (x >> 8) & 0xFF);
600 if ((x & 0xFF00FFFF) == 0)
601 return ARMNImm_TI(2, (x >> 16) & 0xFF);
602 if ((x & 0x00FFFFFF) == 0)
603 return ARMNImm_TI(3, (x >> 24) & 0xFF);
604 if ((x & 0xFFFF00FF) == 0xFF)
605 return ARMNImm_TI(7, (x >> 8) & 0xFF);
606 if ((x & 0xFF00FFFF) == 0xFFFF)
607 return ARMNImm_TI(8, (x >> 16) & 0xFF);
608 if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
609 if ((x & 0xFF00) == 0)
610 return ARMNImm_TI(4, x & 0xFF);
611 if ((x & 0x00FF) == 0)
612 return ARMNImm_TI(5, (x >> 8) & 0xFF);
613 if ((x & 0xFF) == ((x >> 8) & 0xFF))
614 return ARMNImm_TI(6, x & 0xFF);
615 }
616 if ((x & 0x7FFFF) == 0) {
617 tmp.type = 10;
618 tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
619 if (ARMNImm_to_Imm64(&tmp) == x)
620 return ARMNImm_TI(tmp.type, tmp.imm8);
621 }
622 } else {
623 /* This can only be type 9. */
624 tmp.imm8 = (((x >> 56) & 1) << 7)
625 | (((x >> 48) & 1) << 6)
626 | (((x >> 40) & 1) << 5)
627 | (((x >> 32) & 1) << 4)
628 | (((x >> 24) & 1) << 3)
629 | (((x >> 16) & 1) << 2)
630 | (((x >> 8) & 1) << 1)
631 | (((x >> 0) & 1) << 0);
632 tmp.type = 9;
633 if (ARMNImm_to_Imm64 (&tmp) == x)
634 return ARMNImm_TI(tmp.type, tmp.imm8);
635 }
636 return NULL;
637 }
638
ppARMNImm(ARMNImm * i)639 void ppARMNImm (ARMNImm* i) {
640 ULong x = ARMNImm_to_Imm64(i);
641 vex_printf("0x%llX%llX", x, x);
642 }
643
644 /* -- Register or scalar operand --- */
645
mkARMNRS(ARMNRS_tag tag,HReg reg,UInt index)646 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
647 {
648 ARMNRS *p = LibVEX_Alloc_inline(sizeof(ARMNRS));
649 p->tag = tag;
650 p->reg = reg;
651 p->index = index;
652 return p;
653 }
654
ppARMNRS(ARMNRS * p)655 void ppARMNRS(ARMNRS *p)
656 {
657 ppHRegARM(p->reg);
658 if (p->tag == ARMNRS_Scalar) {
659 vex_printf("[%u]", p->index);
660 }
661 }
662
663 /* --------- Instructions. --------- */
664
showARMAluOp(ARMAluOp op)665 const HChar* showARMAluOp ( ARMAluOp op ) {
666 switch (op) {
667 case ARMalu_ADD: return "add";
668 case ARMalu_ADDS: return "adds";
669 case ARMalu_ADC: return "adc";
670 case ARMalu_SUB: return "sub";
671 case ARMalu_SUBS: return "subs";
672 case ARMalu_SBC: return "sbc";
673 case ARMalu_AND: return "and";
674 case ARMalu_BIC: return "bic";
675 case ARMalu_OR: return "orr";
676 case ARMalu_XOR: return "xor";
677 default: vpanic("showARMAluOp");
678 }
679 }
680
showARMShiftOp(ARMShiftOp op)681 const HChar* showARMShiftOp ( ARMShiftOp op ) {
682 switch (op) {
683 case ARMsh_SHL: return "shl";
684 case ARMsh_SHR: return "shr";
685 case ARMsh_SAR: return "sar";
686 default: vpanic("showARMShiftOp");
687 }
688 }
689
showARMUnaryOp(ARMUnaryOp op)690 const HChar* showARMUnaryOp ( ARMUnaryOp op ) {
691 switch (op) {
692 case ARMun_NEG: return "neg";
693 case ARMun_NOT: return "not";
694 case ARMun_CLZ: return "clz";
695 default: vpanic("showARMUnaryOp");
696 }
697 }
698
showARMMulOp(ARMMulOp op)699 const HChar* showARMMulOp ( ARMMulOp op ) {
700 switch (op) {
701 case ARMmul_PLAIN: return "mul";
702 case ARMmul_ZX: return "umull";
703 case ARMmul_SX: return "smull";
704 default: vpanic("showARMMulOp");
705 }
706 }
707
showARMVfpOp(ARMVfpOp op)708 const HChar* showARMVfpOp ( ARMVfpOp op ) {
709 switch (op) {
710 case ARMvfp_ADD: return "add";
711 case ARMvfp_SUB: return "sub";
712 case ARMvfp_MUL: return "mul";
713 case ARMvfp_DIV: return "div";
714 default: vpanic("showARMVfpOp");
715 }
716 }
717
showARMVfpUnaryOp(ARMVfpUnaryOp op)718 const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
719 switch (op) {
720 case ARMvfpu_COPY: return "cpy";
721 case ARMvfpu_NEG: return "neg";
722 case ARMvfpu_ABS: return "abs";
723 case ARMvfpu_SQRT: return "sqrt";
724 default: vpanic("showARMVfpUnaryOp");
725 }
726 }
727
showARMNeonBinOp(ARMNeonBinOp op)728 const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
729 switch (op) {
730 case ARMneon_VAND: return "vand";
731 case ARMneon_VORR: return "vorr";
732 case ARMneon_VXOR: return "veor";
733 case ARMneon_VADD: return "vadd";
734 case ARMneon_VRHADDS: return "vrhadd";
735 case ARMneon_VRHADDU: return "vrhadd";
736 case ARMneon_VADDFP: return "vadd";
737 case ARMneon_VPADDFP: return "vpadd";
738 case ARMneon_VABDFP: return "vabd";
739 case ARMneon_VSUB: return "vsub";
740 case ARMneon_VSUBFP: return "vsub";
741 case ARMneon_VMINU: return "vmin";
742 case ARMneon_VMINS: return "vmin";
743 case ARMneon_VMINF: return "vmin";
744 case ARMneon_VMAXU: return "vmax";
745 case ARMneon_VMAXS: return "vmax";
746 case ARMneon_VMAXF: return "vmax";
747 case ARMneon_VQADDU: return "vqadd";
748 case ARMneon_VQADDS: return "vqadd";
749 case ARMneon_VQSUBU: return "vqsub";
750 case ARMneon_VQSUBS: return "vqsub";
751 case ARMneon_VCGTU: return "vcgt";
752 case ARMneon_VCGTS: return "vcgt";
753 case ARMneon_VCGTF: return "vcgt";
754 case ARMneon_VCGEF: return "vcgt";
755 case ARMneon_VCGEU: return "vcge";
756 case ARMneon_VCGES: return "vcge";
757 case ARMneon_VCEQ: return "vceq";
758 case ARMneon_VCEQF: return "vceq";
759 case ARMneon_VPADD: return "vpadd";
760 case ARMneon_VPMINU: return "vpmin";
761 case ARMneon_VPMINS: return "vpmin";
762 case ARMneon_VPMINF: return "vpmin";
763 case ARMneon_VPMAXU: return "vpmax";
764 case ARMneon_VPMAXS: return "vpmax";
765 case ARMneon_VPMAXF: return "vpmax";
766 case ARMneon_VEXT: return "vext";
767 case ARMneon_VMUL: return "vmuli";
768 case ARMneon_VMULLU: return "vmull";
769 case ARMneon_VMULLS: return "vmull";
770 case ARMneon_VMULP: return "vmul";
771 case ARMneon_VMULFP: return "vmul";
772 case ARMneon_VMULLP: return "vmul";
773 case ARMneon_VQDMULH: return "vqdmulh";
774 case ARMneon_VQRDMULH: return "vqrdmulh";
775 case ARMneon_VQDMULL: return "vqdmull";
776 case ARMneon_VTBL: return "vtbl";
777 case ARMneon_VRECPS: return "vrecps";
778 case ARMneon_VRSQRTS: return "vrecps";
779 case ARMneon_INVALID: return "??invalid??";
780 /* ... */
781 default: vpanic("showARMNeonBinOp");
782 }
783 }
784
showARMNeonBinOpDataType(ARMNeonBinOp op)785 const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
786 switch (op) {
787 case ARMneon_VAND:
788 case ARMneon_VORR:
789 case ARMneon_VXOR:
790 return "";
791 case ARMneon_VADD:
792 case ARMneon_VSUB:
793 case ARMneon_VEXT:
794 case ARMneon_VMUL:
795 case ARMneon_VPADD:
796 case ARMneon_VTBL:
797 case ARMneon_VCEQ:
798 return ".i";
799 case ARMneon_VRHADDU:
800 case ARMneon_VMINU:
801 case ARMneon_VMAXU:
802 case ARMneon_VQADDU:
803 case ARMneon_VQSUBU:
804 case ARMneon_VCGTU:
805 case ARMneon_VCGEU:
806 case ARMneon_VMULLU:
807 case ARMneon_VPMINU:
808 case ARMneon_VPMAXU:
809 return ".u";
810 case ARMneon_VRHADDS:
811 case ARMneon_VMINS:
812 case ARMneon_VMAXS:
813 case ARMneon_VQADDS:
814 case ARMneon_VQSUBS:
815 case ARMneon_VCGTS:
816 case ARMneon_VCGES:
817 case ARMneon_VQDMULL:
818 case ARMneon_VMULLS:
819 case ARMneon_VPMINS:
820 case ARMneon_VPMAXS:
821 case ARMneon_VQDMULH:
822 case ARMneon_VQRDMULH:
823 return ".s";
824 case ARMneon_VMULP:
825 case ARMneon_VMULLP:
826 return ".p";
827 case ARMneon_VADDFP:
828 case ARMneon_VABDFP:
829 case ARMneon_VPADDFP:
830 case ARMneon_VSUBFP:
831 case ARMneon_VMULFP:
832 case ARMneon_VMINF:
833 case ARMneon_VMAXF:
834 case ARMneon_VPMINF:
835 case ARMneon_VPMAXF:
836 case ARMneon_VCGTF:
837 case ARMneon_VCGEF:
838 case ARMneon_VCEQF:
839 case ARMneon_VRECPS:
840 case ARMneon_VRSQRTS:
841 return ".f";
842 /* ... */
843 default: vpanic("showARMNeonBinOpDataType");
844 }
845 }
846
showARMNeonUnOp(ARMNeonUnOp op)847 const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
848 switch (op) {
849 case ARMneon_COPY: return "vmov";
850 case ARMneon_COPYLS: return "vmov";
851 case ARMneon_COPYLU: return "vmov";
852 case ARMneon_COPYN: return "vmov";
853 case ARMneon_COPYQNSS: return "vqmovn";
854 case ARMneon_COPYQNUS: return "vqmovun";
855 case ARMneon_COPYQNUU: return "vqmovn";
856 case ARMneon_NOT: return "vmvn";
857 case ARMneon_EQZ: return "vceq";
858 case ARMneon_CNT: return "vcnt";
859 case ARMneon_CLS: return "vcls";
860 case ARMneon_CLZ: return "vclz";
861 case ARMneon_DUP: return "vdup";
862 case ARMneon_PADDLS: return "vpaddl";
863 case ARMneon_PADDLU: return "vpaddl";
864 case ARMneon_VQSHLNSS: return "vqshl";
865 case ARMneon_VQSHLNUU: return "vqshl";
866 case ARMneon_VQSHLNUS: return "vqshlu";
867 case ARMneon_REV16: return "vrev16";
868 case ARMneon_REV32: return "vrev32";
869 case ARMneon_REV64: return "vrev64";
870 case ARMneon_VCVTFtoU: return "vcvt";
871 case ARMneon_VCVTFtoS: return "vcvt";
872 case ARMneon_VCVTUtoF: return "vcvt";
873 case ARMneon_VCVTStoF: return "vcvt";
874 case ARMneon_VCVTFtoFixedU: return "vcvt";
875 case ARMneon_VCVTFtoFixedS: return "vcvt";
876 case ARMneon_VCVTFixedUtoF: return "vcvt";
877 case ARMneon_VCVTFixedStoF: return "vcvt";
878 case ARMneon_VCVTF32toF16: return "vcvt";
879 case ARMneon_VCVTF16toF32: return "vcvt";
880 case ARMneon_VRECIP: return "vrecip";
881 case ARMneon_VRECIPF: return "vrecipf";
882 case ARMneon_VNEGF: return "vneg";
883 case ARMneon_ABS: return "vabs";
884 case ARMneon_VABSFP: return "vabsfp";
885 case ARMneon_VRSQRTEFP: return "vrsqrtefp";
886 case ARMneon_VRSQRTE: return "vrsqrte";
887 /* ... */
888 default: vpanic("showARMNeonUnOp");
889 }
890 }
891
showARMNeonUnOpDataType(ARMNeonUnOp op)892 const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
893 switch (op) {
894 case ARMneon_COPY:
895 case ARMneon_NOT:
896 return "";
897 case ARMneon_COPYN:
898 case ARMneon_EQZ:
899 case ARMneon_CNT:
900 case ARMneon_DUP:
901 case ARMneon_REV16:
902 case ARMneon_REV32:
903 case ARMneon_REV64:
904 return ".i";
905 case ARMneon_COPYLU:
906 case ARMneon_PADDLU:
907 case ARMneon_COPYQNUU:
908 case ARMneon_VQSHLNUU:
909 case ARMneon_VRECIP:
910 case ARMneon_VRSQRTE:
911 return ".u";
912 case ARMneon_CLS:
913 case ARMneon_CLZ:
914 case ARMneon_COPYLS:
915 case ARMneon_PADDLS:
916 case ARMneon_COPYQNSS:
917 case ARMneon_COPYQNUS:
918 case ARMneon_VQSHLNSS:
919 case ARMneon_VQSHLNUS:
920 case ARMneon_ABS:
921 return ".s";
922 case ARMneon_VRECIPF:
923 case ARMneon_VNEGF:
924 case ARMneon_VABSFP:
925 case ARMneon_VRSQRTEFP:
926 return ".f";
927 case ARMneon_VCVTFtoU: return ".u32.f32";
928 case ARMneon_VCVTFtoS: return ".s32.f32";
929 case ARMneon_VCVTUtoF: return ".f32.u32";
930 case ARMneon_VCVTStoF: return ".f32.s32";
931 case ARMneon_VCVTF16toF32: return ".f32.f16";
932 case ARMneon_VCVTF32toF16: return ".f16.f32";
933 case ARMneon_VCVTFtoFixedU: return ".u32.f32";
934 case ARMneon_VCVTFtoFixedS: return ".s32.f32";
935 case ARMneon_VCVTFixedUtoF: return ".f32.u32";
936 case ARMneon_VCVTFixedStoF: return ".f32.s32";
937 /* ... */
938 default: vpanic("showARMNeonUnOpDataType");
939 }
940 }
941
showARMNeonUnOpS(ARMNeonUnOpS op)942 const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
943 switch (op) {
944 case ARMneon_SETELEM: return "vmov";
945 case ARMneon_GETELEMU: return "vmov";
946 case ARMneon_GETELEMS: return "vmov";
947 case ARMneon_VDUP: return "vdup";
948 /* ... */
949 default: vpanic("showARMNeonUnarySOp");
950 }
951 }
952
showARMNeonUnOpSDataType(ARMNeonUnOpS op)953 const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
954 switch (op) {
955 case ARMneon_SETELEM:
956 case ARMneon_VDUP:
957 return ".i";
958 case ARMneon_GETELEMS:
959 return ".s";
960 case ARMneon_GETELEMU:
961 return ".u";
962 /* ... */
963 default: vpanic("showARMNeonUnarySOp");
964 }
965 }
966
showARMNeonShiftOp(ARMNeonShiftOp op)967 const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
968 switch (op) {
969 case ARMneon_VSHL: return "vshl";
970 case ARMneon_VSAL: return "vshl";
971 case ARMneon_VQSHL: return "vqshl";
972 case ARMneon_VQSAL: return "vqshl";
973 /* ... */
974 default: vpanic("showARMNeonShiftOp");
975 }
976 }
977
showARMNeonShiftOpDataType(ARMNeonShiftOp op)978 const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
979 switch (op) {
980 case ARMneon_VSHL:
981 case ARMneon_VQSHL:
982 return ".u";
983 case ARMneon_VSAL:
984 case ARMneon_VQSAL:
985 return ".s";
986 /* ... */
987 default: vpanic("showARMNeonShiftOpDataType");
988 }
989 }
990
showARMNeonDualOp(ARMNeonDualOp op)991 const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
992 switch (op) {
993 case ARMneon_TRN: return "vtrn";
994 case ARMneon_ZIP: return "vzip";
995 case ARMneon_UZP: return "vuzp";
996 /* ... */
997 default: vpanic("showARMNeonDualOp");
998 }
999 }
1000
showARMNeonDualOpDataType(ARMNeonDualOp op)1001 const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1002 switch (op) {
1003 case ARMneon_TRN:
1004 case ARMneon_ZIP:
1005 case ARMneon_UZP:
1006 return "i";
1007 /* ... */
1008 default: vpanic("showARMNeonDualOp");
1009 }
1010 }
1011
showARMNeonDataSize_wrk(UInt size)1012 static const HChar* showARMNeonDataSize_wrk ( UInt size )
1013 {
1014 switch (size) {
1015 case 0: return "8";
1016 case 1: return "16";
1017 case 2: return "32";
1018 case 3: return "64";
1019 default: vpanic("showARMNeonDataSize");
1020 }
1021 }
1022
showARMNeonDataSize(const ARMInstr * i)1023 static const HChar* showARMNeonDataSize ( const ARMInstr* i )
1024 {
1025 switch (i->tag) {
1026 case ARMin_NBinary:
1027 if (i->ARMin.NBinary.op == ARMneon_VEXT)
1028 return "8";
1029 if (i->ARMin.NBinary.op == ARMneon_VAND ||
1030 i->ARMin.NBinary.op == ARMneon_VORR ||
1031 i->ARMin.NBinary.op == ARMneon_VXOR)
1032 return "";
1033 return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1034 case ARMin_NUnary:
1035 if (i->ARMin.NUnary.op == ARMneon_COPY ||
1036 i->ARMin.NUnary.op == ARMneon_NOT ||
1037 i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1038 i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1039 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1040 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1041 i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1042 i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1043 i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1044 i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1045 i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1046 i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1047 return "";
1048 if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1049 i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1050 i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1051 UInt size;
1052 size = i->ARMin.NUnary.size;
1053 if (size & 0x40)
1054 return "64";
1055 if (size & 0x20)
1056 return "32";
1057 if (size & 0x10)
1058 return "16";
1059 if (size & 0x08)
1060 return "8";
1061 vpanic("showARMNeonDataSize");
1062 }
1063 return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1064 case ARMin_NUnaryS:
1065 if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1066 int size;
1067 size = i->ARMin.NUnaryS.size;
1068 if ((size & 1) == 1)
1069 return "8";
1070 if ((size & 3) == 2)
1071 return "16";
1072 if ((size & 7) == 4)
1073 return "32";
1074 vpanic("showARMNeonDataSize");
1075 }
1076 return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1077 case ARMin_NShift:
1078 return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1079 case ARMin_NDual:
1080 return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1081 default:
1082 vpanic("showARMNeonDataSize");
1083 }
1084 }
1085
ARMInstr_Alu(ARMAluOp op,HReg dst,HReg argL,ARMRI84 * argR)1086 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
1087 HReg dst, HReg argL, ARMRI84* argR ) {
1088 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1089 i->tag = ARMin_Alu;
1090 i->ARMin.Alu.op = op;
1091 i->ARMin.Alu.dst = dst;
1092 i->ARMin.Alu.argL = argL;
1093 i->ARMin.Alu.argR = argR;
1094 return i;
1095 }
ARMInstr_Shift(ARMShiftOp op,HReg dst,HReg argL,ARMRI5 * argR)1096 ARMInstr* ARMInstr_Shift ( ARMShiftOp op,
1097 HReg dst, HReg argL, ARMRI5* argR ) {
1098 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1099 i->tag = ARMin_Shift;
1100 i->ARMin.Shift.op = op;
1101 i->ARMin.Shift.dst = dst;
1102 i->ARMin.Shift.argL = argL;
1103 i->ARMin.Shift.argR = argR;
1104 return i;
1105 }
ARMInstr_Unary(ARMUnaryOp op,HReg dst,HReg src)1106 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
1107 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1108 i->tag = ARMin_Unary;
1109 i->ARMin.Unary.op = op;
1110 i->ARMin.Unary.dst = dst;
1111 i->ARMin.Unary.src = src;
1112 return i;
1113 }
ARMInstr_CmpOrTst(Bool isCmp,HReg argL,ARMRI84 * argR)1114 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
1115 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1116 i->tag = ARMin_CmpOrTst;
1117 i->ARMin.CmpOrTst.isCmp = isCmp;
1118 i->ARMin.CmpOrTst.argL = argL;
1119 i->ARMin.CmpOrTst.argR = argR;
1120 return i;
1121 }
ARMInstr_Mov(HReg dst,ARMRI84 * src)1122 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
1123 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1124 i->tag = ARMin_Mov;
1125 i->ARMin.Mov.dst = dst;
1126 i->ARMin.Mov.src = src;
1127 return i;
1128 }
ARMInstr_Imm32(HReg dst,UInt imm32)1129 ARMInstr* ARMInstr_Imm32 ( HReg dst, UInt imm32 ) {
1130 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1131 i->tag = ARMin_Imm32;
1132 i->ARMin.Imm32.dst = dst;
1133 i->ARMin.Imm32.imm32 = imm32;
1134 return i;
1135 }
ARMInstr_LdSt32(ARMCondCode cc,Bool isLoad,HReg rD,ARMAMode1 * amode)1136 ARMInstr* ARMInstr_LdSt32 ( ARMCondCode cc,
1137 Bool isLoad, HReg rD, ARMAMode1* amode ) {
1138 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1139 i->tag = ARMin_LdSt32;
1140 i->ARMin.LdSt32.cc = cc;
1141 i->ARMin.LdSt32.isLoad = isLoad;
1142 i->ARMin.LdSt32.rD = rD;
1143 i->ARMin.LdSt32.amode = amode;
1144 vassert(cc != ARMcc_NV);
1145 return i;
1146 }
ARMInstr_LdSt16(ARMCondCode cc,Bool isLoad,Bool signedLoad,HReg rD,ARMAMode2 * amode)1147 ARMInstr* ARMInstr_LdSt16 ( ARMCondCode cc,
1148 Bool isLoad, Bool signedLoad,
1149 HReg rD, ARMAMode2* amode ) {
1150 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1151 i->tag = ARMin_LdSt16;
1152 i->ARMin.LdSt16.cc = cc;
1153 i->ARMin.LdSt16.isLoad = isLoad;
1154 i->ARMin.LdSt16.signedLoad = signedLoad;
1155 i->ARMin.LdSt16.rD = rD;
1156 i->ARMin.LdSt16.amode = amode;
1157 vassert(cc != ARMcc_NV);
1158 return i;
1159 }
ARMInstr_LdSt8U(ARMCondCode cc,Bool isLoad,HReg rD,ARMAMode1 * amode)1160 ARMInstr* ARMInstr_LdSt8U ( ARMCondCode cc,
1161 Bool isLoad, HReg rD, ARMAMode1* amode ) {
1162 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1163 i->tag = ARMin_LdSt8U;
1164 i->ARMin.LdSt8U.cc = cc;
1165 i->ARMin.LdSt8U.isLoad = isLoad;
1166 i->ARMin.LdSt8U.rD = rD;
1167 i->ARMin.LdSt8U.amode = amode;
1168 vassert(cc != ARMcc_NV);
1169 return i;
1170 }
ARMInstr_Ld8S(ARMCondCode cc,HReg rD,ARMAMode2 * amode)1171 ARMInstr* ARMInstr_Ld8S ( ARMCondCode cc, HReg rD, ARMAMode2* amode ) {
1172 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1173 i->tag = ARMin_Ld8S;
1174 i->ARMin.Ld8S.cc = cc;
1175 i->ARMin.Ld8S.rD = rD;
1176 i->ARMin.Ld8S.amode = amode;
1177 vassert(cc != ARMcc_NV);
1178 return i;
1179 }
ARMInstr_XDirect(Addr32 dstGA,ARMAMode1 * amR15T,ARMCondCode cond,Bool toFastEP)1180 ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
1181 ARMCondCode cond, Bool toFastEP ) {
1182 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1183 i->tag = ARMin_XDirect;
1184 i->ARMin.XDirect.dstGA = dstGA;
1185 i->ARMin.XDirect.amR15T = amR15T;
1186 i->ARMin.XDirect.cond = cond;
1187 i->ARMin.XDirect.toFastEP = toFastEP;
1188 return i;
1189 }
ARMInstr_XIndir(HReg dstGA,ARMAMode1 * amR15T,ARMCondCode cond)1190 ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
1191 ARMCondCode cond ) {
1192 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1193 i->tag = ARMin_XIndir;
1194 i->ARMin.XIndir.dstGA = dstGA;
1195 i->ARMin.XIndir.amR15T = amR15T;
1196 i->ARMin.XIndir.cond = cond;
1197 return i;
1198 }
ARMInstr_XAssisted(HReg dstGA,ARMAMode1 * amR15T,ARMCondCode cond,IRJumpKind jk)1199 ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
1200 ARMCondCode cond, IRJumpKind jk ) {
1201 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1202 i->tag = ARMin_XAssisted;
1203 i->ARMin.XAssisted.dstGA = dstGA;
1204 i->ARMin.XAssisted.amR15T = amR15T;
1205 i->ARMin.XAssisted.cond = cond;
1206 i->ARMin.XAssisted.jk = jk;
1207 return i;
1208 }
ARMInstr_CMov(ARMCondCode cond,HReg dst,ARMRI84 * src)1209 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
1210 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1211 i->tag = ARMin_CMov;
1212 i->ARMin.CMov.cond = cond;
1213 i->ARMin.CMov.dst = dst;
1214 i->ARMin.CMov.src = src;
1215 vassert(cond != ARMcc_AL);
1216 return i;
1217 }
ARMInstr_Call(ARMCondCode cond,Addr32 target,Int nArgRegs,RetLoc rloc)1218 ARMInstr* ARMInstr_Call ( ARMCondCode cond, Addr32 target, Int nArgRegs,
1219 RetLoc rloc ) {
1220 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1221 i->tag = ARMin_Call;
1222 i->ARMin.Call.cond = cond;
1223 i->ARMin.Call.target = target;
1224 i->ARMin.Call.nArgRegs = nArgRegs;
1225 i->ARMin.Call.rloc = rloc;
1226 vassert(is_sane_RetLoc(rloc));
1227 return i;
1228 }
ARMInstr_Mul(ARMMulOp op)1229 ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
1230 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1231 i->tag = ARMin_Mul;
1232 i->ARMin.Mul.op = op;
1233 return i;
1234 }
ARMInstr_LdrEX(Int szB)1235 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
1236 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1237 i->tag = ARMin_LdrEX;
1238 i->ARMin.LdrEX.szB = szB;
1239 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1240 return i;
1241 }
ARMInstr_StrEX(Int szB)1242 ARMInstr* ARMInstr_StrEX ( Int szB ) {
1243 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1244 i->tag = ARMin_StrEX;
1245 i->ARMin.StrEX.szB = szB;
1246 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1247 return i;
1248 }
ARMInstr_VLdStD(Bool isLoad,HReg dD,ARMAModeV * am)1249 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
1250 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1251 i->tag = ARMin_VLdStD;
1252 i->ARMin.VLdStD.isLoad = isLoad;
1253 i->ARMin.VLdStD.dD = dD;
1254 i->ARMin.VLdStD.amode = am;
1255 return i;
1256 }
ARMInstr_VLdStS(Bool isLoad,HReg fD,ARMAModeV * am)1257 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
1258 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1259 i->tag = ARMin_VLdStS;
1260 i->ARMin.VLdStS.isLoad = isLoad;
1261 i->ARMin.VLdStS.fD = fD;
1262 i->ARMin.VLdStS.amode = am;
1263 return i;
1264 }
ARMInstr_VAluD(ARMVfpOp op,HReg dst,HReg argL,HReg argR)1265 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1266 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1267 i->tag = ARMin_VAluD;
1268 i->ARMin.VAluD.op = op;
1269 i->ARMin.VAluD.dst = dst;
1270 i->ARMin.VAluD.argL = argL;
1271 i->ARMin.VAluD.argR = argR;
1272 return i;
1273 }
ARMInstr_VAluS(ARMVfpOp op,HReg dst,HReg argL,HReg argR)1274 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1275 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1276 i->tag = ARMin_VAluS;
1277 i->ARMin.VAluS.op = op;
1278 i->ARMin.VAluS.dst = dst;
1279 i->ARMin.VAluS.argL = argL;
1280 i->ARMin.VAluS.argR = argR;
1281 return i;
1282 }
ARMInstr_VUnaryD(ARMVfpUnaryOp op,HReg dst,HReg src)1283 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1284 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1285 i->tag = ARMin_VUnaryD;
1286 i->ARMin.VUnaryD.op = op;
1287 i->ARMin.VUnaryD.dst = dst;
1288 i->ARMin.VUnaryD.src = src;
1289 return i;
1290 }
ARMInstr_VUnaryS(ARMVfpUnaryOp op,HReg dst,HReg src)1291 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1292 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1293 i->tag = ARMin_VUnaryS;
1294 i->ARMin.VUnaryS.op = op;
1295 i->ARMin.VUnaryS.dst = dst;
1296 i->ARMin.VUnaryS.src = src;
1297 return i;
1298 }
ARMInstr_VCmpD(HReg argL,HReg argR)1299 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
1300 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1301 i->tag = ARMin_VCmpD;
1302 i->ARMin.VCmpD.argL = argL;
1303 i->ARMin.VCmpD.argR = argR;
1304 return i;
1305 }
ARMInstr_VCMovD(ARMCondCode cond,HReg dst,HReg src)1306 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1307 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1308 i->tag = ARMin_VCMovD;
1309 i->ARMin.VCMovD.cond = cond;
1310 i->ARMin.VCMovD.dst = dst;
1311 i->ARMin.VCMovD.src = src;
1312 vassert(cond != ARMcc_AL);
1313 return i;
1314 }
ARMInstr_VCMovS(ARMCondCode cond,HReg dst,HReg src)1315 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1316 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1317 i->tag = ARMin_VCMovS;
1318 i->ARMin.VCMovS.cond = cond;
1319 i->ARMin.VCMovS.dst = dst;
1320 i->ARMin.VCMovS.src = src;
1321 vassert(cond != ARMcc_AL);
1322 return i;
1323 }
ARMInstr_VCvtSD(Bool sToD,HReg dst,HReg src)1324 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1325 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1326 i->tag = ARMin_VCvtSD;
1327 i->ARMin.VCvtSD.sToD = sToD;
1328 i->ARMin.VCvtSD.dst = dst;
1329 i->ARMin.VCvtSD.src = src;
1330 return i;
1331 }
ARMInstr_VXferQ(Bool toQ,HReg qD,HReg dHi,HReg dLo)1332 ARMInstr* ARMInstr_VXferQ ( Bool toQ, HReg qD, HReg dHi, HReg dLo ) {
1333 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1334 i->tag = ARMin_VXferQ;
1335 i->ARMin.VXferQ.toQ = toQ;
1336 i->ARMin.VXferQ.qD = qD;
1337 i->ARMin.VXferQ.dHi = dHi;
1338 i->ARMin.VXferQ.dLo = dLo;
1339 return i;
1340 }
ARMInstr_VXferD(Bool toD,HReg dD,HReg rHi,HReg rLo)1341 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1342 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1343 i->tag = ARMin_VXferD;
1344 i->ARMin.VXferD.toD = toD;
1345 i->ARMin.VXferD.dD = dD;
1346 i->ARMin.VXferD.rHi = rHi;
1347 i->ARMin.VXferD.rLo = rLo;
1348 return i;
1349 }
ARMInstr_VXferS(Bool toS,HReg fD,HReg rLo)1350 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1351 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1352 i->tag = ARMin_VXferS;
1353 i->ARMin.VXferS.toS = toS;
1354 i->ARMin.VXferS.fD = fD;
1355 i->ARMin.VXferS.rLo = rLo;
1356 return i;
1357 }
ARMInstr_VCvtID(Bool iToD,Bool syned,HReg dst,HReg src)1358 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1359 HReg dst, HReg src ) {
1360 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1361 i->tag = ARMin_VCvtID;
1362 i->ARMin.VCvtID.iToD = iToD;
1363 i->ARMin.VCvtID.syned = syned;
1364 i->ARMin.VCvtID.dst = dst;
1365 i->ARMin.VCvtID.src = src;
1366 return i;
1367 }
ARMInstr_VRIntR(Bool isF64,HReg dst,HReg src)1368 ARMInstr* ARMInstr_VRIntR ( Bool isF64, HReg dst, HReg src )
1369 {
1370 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1371 i->tag = ARMin_VRIntR;
1372 i->ARMin.VRIntR.isF64 = isF64;
1373 i->ARMin.VRIntR.dst = dst ;
1374 i->ARMin.VRIntR.src = src;
1375 return i;
1376 }
ARMInstr_VMinMaxNum(Bool isF64,Bool isMax,HReg dst,HReg srcL,HReg srcR)1377 ARMInstr* ARMInstr_VMinMaxNum ( Bool isF64, Bool isMax,
1378 HReg dst, HReg srcL, HReg srcR )
1379 {
1380 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1381 i->tag = ARMin_VMinMaxNum;
1382 i->ARMin.VMinMaxNum.isF64 = isF64;
1383 i->ARMin.VMinMaxNum.isMax = isMax;
1384 i->ARMin.VMinMaxNum.dst = dst ;
1385 i->ARMin.VMinMaxNum.srcL = srcL;
1386 i->ARMin.VMinMaxNum.srcR = srcR;
1387 return i;
1388 }
ARMInstr_FPSCR(Bool toFPSCR,HReg iReg)1389 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
1390 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1391 i->tag = ARMin_FPSCR;
1392 i->ARMin.FPSCR.toFPSCR = toFPSCR;
1393 i->ARMin.FPSCR.iReg = iReg;
1394 return i;
1395 }
ARMInstr_MFence(void)1396 ARMInstr* ARMInstr_MFence ( void ) {
1397 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1398 i->tag = ARMin_MFence;
1399 return i;
1400 }
ARMInstr_CLREX(void)1401 ARMInstr* ARMInstr_CLREX( void ) {
1402 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1403 i->tag = ARMin_CLREX;
1404 return i;
1405 }
1406
ARMInstr_NLdStQ(Bool isLoad,HReg dQ,ARMAModeN * amode)1407 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
1408 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1409 i->tag = ARMin_NLdStQ;
1410 i->ARMin.NLdStQ.isLoad = isLoad;
1411 i->ARMin.NLdStQ.dQ = dQ;
1412 i->ARMin.NLdStQ.amode = amode;
1413 return i;
1414 }
1415
ARMInstr_NLdStD(Bool isLoad,HReg dD,ARMAModeN * amode)1416 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1417 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1418 i->tag = ARMin_NLdStD;
1419 i->ARMin.NLdStD.isLoad = isLoad;
1420 i->ARMin.NLdStD.dD = dD;
1421 i->ARMin.NLdStD.amode = amode;
1422 return i;
1423 }
1424
ARMInstr_NUnary(ARMNeonUnOp op,HReg dQ,HReg nQ,UInt size,Bool Q)1425 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1426 UInt size, Bool Q ) {
1427 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1428 i->tag = ARMin_NUnary;
1429 i->ARMin.NUnary.op = op;
1430 i->ARMin.NUnary.src = nQ;
1431 i->ARMin.NUnary.dst = dQ;
1432 i->ARMin.NUnary.size = size;
1433 i->ARMin.NUnary.Q = Q;
1434 return i;
1435 }
1436
ARMInstr_NUnaryS(ARMNeonUnOpS op,ARMNRS * dst,ARMNRS * src,UInt size,Bool Q)1437 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
1438 UInt size, Bool Q ) {
1439 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1440 i->tag = ARMin_NUnaryS;
1441 i->ARMin.NUnaryS.op = op;
1442 i->ARMin.NUnaryS.src = src;
1443 i->ARMin.NUnaryS.dst = dst;
1444 i->ARMin.NUnaryS.size = size;
1445 i->ARMin.NUnaryS.Q = Q;
1446 return i;
1447 }
1448
ARMInstr_NDual(ARMNeonDualOp op,HReg nQ,HReg mQ,UInt size,Bool Q)1449 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1450 UInt size, Bool Q ) {
1451 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1452 i->tag = ARMin_NDual;
1453 i->ARMin.NDual.op = op;
1454 i->ARMin.NDual.arg1 = nQ;
1455 i->ARMin.NDual.arg2 = mQ;
1456 i->ARMin.NDual.size = size;
1457 i->ARMin.NDual.Q = Q;
1458 return i;
1459 }
1460
ARMInstr_NBinary(ARMNeonBinOp op,HReg dst,HReg argL,HReg argR,UInt size,Bool Q)1461 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1462 HReg dst, HReg argL, HReg argR,
1463 UInt size, Bool Q ) {
1464 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1465 i->tag = ARMin_NBinary;
1466 i->ARMin.NBinary.op = op;
1467 i->ARMin.NBinary.argL = argL;
1468 i->ARMin.NBinary.argR = argR;
1469 i->ARMin.NBinary.dst = dst;
1470 i->ARMin.NBinary.size = size;
1471 i->ARMin.NBinary.Q = Q;
1472 return i;
1473 }
1474
ARMInstr_NeonImm(HReg dst,ARMNImm * imm)1475 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
1476 ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1477 i->tag = ARMin_NeonImm;
1478 i->ARMin.NeonImm.dst = dst;
1479 i->ARMin.NeonImm.imm = imm;
1480 return i;
1481 }
1482
ARMInstr_NCMovQ(ARMCondCode cond,HReg dst,HReg src)1483 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1484 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1485 i->tag = ARMin_NCMovQ;
1486 i->ARMin.NCMovQ.cond = cond;
1487 i->ARMin.NCMovQ.dst = dst;
1488 i->ARMin.NCMovQ.src = src;
1489 vassert(cond != ARMcc_AL);
1490 return i;
1491 }
1492
ARMInstr_NShift(ARMNeonShiftOp op,HReg dst,HReg argL,HReg argR,UInt size,Bool Q)1493 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1494 HReg dst, HReg argL, HReg argR,
1495 UInt size, Bool Q ) {
1496 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1497 i->tag = ARMin_NShift;
1498 i->ARMin.NShift.op = op;
1499 i->ARMin.NShift.argL = argL;
1500 i->ARMin.NShift.argR = argR;
1501 i->ARMin.NShift.dst = dst;
1502 i->ARMin.NShift.size = size;
1503 i->ARMin.NShift.Q = Q;
1504 return i;
1505 }
1506
ARMInstr_NShl64(HReg dst,HReg src,UInt amt)1507 ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
1508 {
1509 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1510 i->tag = ARMin_NShl64;
1511 i->ARMin.NShl64.dst = dst;
1512 i->ARMin.NShl64.src = src;
1513 i->ARMin.NShl64.amt = amt;
1514 vassert(amt >= 1 && amt <= 63);
1515 return i;
1516 }
1517
1518 /* Helper copy-pasted from isel.c */
fitsIn8x4(UInt * u8,UInt * u4,UInt u)1519 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1520 {
1521 UInt i;
1522 for (i = 0; i < 16; i++) {
1523 if (0 == (u & 0xFFFFFF00)) {
1524 *u8 = u;
1525 *u4 = i;
1526 return True;
1527 }
1528 u = ROR32(u, 30);
1529 }
1530 vassert(i == 16);
1531 return False;
1532 }
1533
ARMInstr_Add32(HReg rD,HReg rN,UInt imm32)1534 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1535 UInt u8, u4;
1536 ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1537 /* Try to generate single ADD if possible */
1538 if (fitsIn8x4(&u8, &u4, imm32)) {
1539 i->tag = ARMin_Alu;
1540 i->ARMin.Alu.op = ARMalu_ADD;
1541 i->ARMin.Alu.dst = rD;
1542 i->ARMin.Alu.argL = rN;
1543 i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1544 } else {
1545 i->tag = ARMin_Add32;
1546 i->ARMin.Add32.rD = rD;
1547 i->ARMin.Add32.rN = rN;
1548 i->ARMin.Add32.imm32 = imm32;
1549 }
1550 return i;
1551 }
1552
ARMInstr_EvCheck(ARMAMode1 * amCounter,ARMAMode1 * amFailAddr)1553 ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
1554 ARMAMode1* amFailAddr ) {
1555 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1556 i->tag = ARMin_EvCheck;
1557 i->ARMin.EvCheck.amCounter = amCounter;
1558 i->ARMin.EvCheck.amFailAddr = amFailAddr;
1559 return i;
1560 }
1561
ARMInstr_ProfInc(void)1562 ARMInstr* ARMInstr_ProfInc ( void ) {
1563 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1564 i->tag = ARMin_ProfInc;
1565 return i;
1566 }
1567
1568 /* ... */
1569
ppARMInstr(const ARMInstr * i)1570 void ppARMInstr ( const ARMInstr* i ) {
1571 switch (i->tag) {
1572 case ARMin_Alu:
1573 vex_printf("%-4s ", showARMAluOp(i->ARMin.Alu.op));
1574 ppHRegARM(i->ARMin.Alu.dst);
1575 vex_printf(", ");
1576 ppHRegARM(i->ARMin.Alu.argL);
1577 vex_printf(", ");
1578 ppARMRI84(i->ARMin.Alu.argR);
1579 return;
1580 case ARMin_Shift:
1581 vex_printf("%s ", showARMShiftOp(i->ARMin.Shift.op));
1582 ppHRegARM(i->ARMin.Shift.dst);
1583 vex_printf(", ");
1584 ppHRegARM(i->ARMin.Shift.argL);
1585 vex_printf(", ");
1586 ppARMRI5(i->ARMin.Shift.argR);
1587 return;
1588 case ARMin_Unary:
1589 vex_printf("%s ", showARMUnaryOp(i->ARMin.Unary.op));
1590 ppHRegARM(i->ARMin.Unary.dst);
1591 vex_printf(", ");
1592 ppHRegARM(i->ARMin.Unary.src);
1593 return;
1594 case ARMin_CmpOrTst:
1595 vex_printf("%s ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
1596 ppHRegARM(i->ARMin.CmpOrTst.argL);
1597 vex_printf(", ");
1598 ppARMRI84(i->ARMin.CmpOrTst.argR);
1599 return;
1600 case ARMin_Mov:
1601 vex_printf("mov ");
1602 ppHRegARM(i->ARMin.Mov.dst);
1603 vex_printf(", ");
1604 ppARMRI84(i->ARMin.Mov.src);
1605 return;
1606 case ARMin_Imm32:
1607 vex_printf("imm ");
1608 ppHRegARM(i->ARMin.Imm32.dst);
1609 vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
1610 return;
1611 case ARMin_LdSt32:
1612 if (i->ARMin.LdSt32.isLoad) {
1613 vex_printf("ldr%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? " "
1614 : showARMCondCode(i->ARMin.LdSt32.cc));
1615 ppHRegARM(i->ARMin.LdSt32.rD);
1616 vex_printf(", ");
1617 ppARMAMode1(i->ARMin.LdSt32.amode);
1618 } else {
1619 vex_printf("str%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? " "
1620 : showARMCondCode(i->ARMin.LdSt32.cc));
1621 ppARMAMode1(i->ARMin.LdSt32.amode);
1622 vex_printf(", ");
1623 ppHRegARM(i->ARMin.LdSt32.rD);
1624 }
1625 return;
1626 case ARMin_LdSt16:
1627 if (i->ARMin.LdSt16.isLoad) {
1628 vex_printf("%s%s%s",
1629 i->ARMin.LdSt16.signedLoad ? "ldrsh" : "ldrh",
1630 i->ARMin.LdSt16.cc == ARMcc_AL ? " "
1631 : showARMCondCode(i->ARMin.LdSt16.cc),
1632 i->ARMin.LdSt16.signedLoad ? " " : " ");
1633 ppHRegARM(i->ARMin.LdSt16.rD);
1634 vex_printf(", ");
1635 ppARMAMode2(i->ARMin.LdSt16.amode);
1636 } else {
1637 vex_printf("strh%s ",
1638 i->ARMin.LdSt16.cc == ARMcc_AL ? " "
1639 : showARMCondCode(i->ARMin.LdSt16.cc));
1640 ppARMAMode2(i->ARMin.LdSt16.amode);
1641 vex_printf(", ");
1642 ppHRegARM(i->ARMin.LdSt16.rD);
1643 }
1644 return;
1645 case ARMin_LdSt8U:
1646 if (i->ARMin.LdSt8U.isLoad) {
1647 vex_printf("ldrb%s ", i->ARMin.LdSt8U.cc == ARMcc_AL ? " "
1648 : showARMCondCode(i->ARMin.LdSt8U.cc));
1649 ppHRegARM(i->ARMin.LdSt8U.rD);
1650 vex_printf(", ");
1651 ppARMAMode1(i->ARMin.LdSt8U.amode);
1652 } else {
1653 vex_printf("strb%s ", i->ARMin.LdSt8U.cc == ARMcc_AL ? " "
1654 : showARMCondCode(i->ARMin.LdSt8U.cc));
1655 ppARMAMode1(i->ARMin.LdSt8U.amode);
1656 vex_printf(", ");
1657 ppHRegARM(i->ARMin.LdSt8U.rD);
1658 }
1659 return;
1660 case ARMin_Ld8S:
1661 vex_printf("ldrsb%s ", i->ARMin.Ld8S.cc == ARMcc_AL ? " "
1662 : showARMCondCode(i->ARMin.Ld8S.cc));
1663 ppARMAMode2(i->ARMin.Ld8S.amode);
1664 vex_printf(", ");
1665 ppHRegARM(i->ARMin.Ld8S.rD);
1666 return;
1667 case ARMin_XDirect:
1668 vex_printf("(xDirect) ");
1669 vex_printf("if (%%cpsr.%s) { ",
1670 showARMCondCode(i->ARMin.XDirect.cond));
1671 vex_printf("movw r12,0x%x; ",
1672 (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
1673 vex_printf("movt r12,0x%x; ",
1674 (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
1675 vex_printf("str r12,");
1676 ppARMAMode1(i->ARMin.XDirect.amR15T);
1677 vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
1678 i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1679 vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
1680 i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1681 vex_printf("blx r12 }");
1682 return;
1683 case ARMin_XIndir:
1684 vex_printf("(xIndir) ");
1685 vex_printf("if (%%cpsr.%s) { ",
1686 showARMCondCode(i->ARMin.XIndir.cond));
1687 vex_printf("str ");
1688 ppHRegARM(i->ARMin.XIndir.dstGA);
1689 vex_printf(",");
1690 ppARMAMode1(i->ARMin.XIndir.amR15T);
1691 vex_printf("; movw r12,LO16($disp_cp_xindir); ");
1692 vex_printf("movt r12,HI16($disp_cp_xindir); ");
1693 vex_printf("blx r12 }");
1694 return;
1695 case ARMin_XAssisted:
1696 vex_printf("(xAssisted) ");
1697 vex_printf("if (%%cpsr.%s) { ",
1698 showARMCondCode(i->ARMin.XAssisted.cond));
1699 vex_printf("str ");
1700 ppHRegARM(i->ARMin.XAssisted.dstGA);
1701 vex_printf(",");
1702 ppARMAMode1(i->ARMin.XAssisted.amR15T);
1703 vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
1704 (Int)i->ARMin.XAssisted.jk);
1705 vex_printf("movw r12,LO16($disp_cp_xassisted); ");
1706 vex_printf("movt r12,HI16($disp_cp_xassisted); ");
1707 vex_printf("blx r12 }");
1708 return;
1709 case ARMin_CMov:
1710 vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
1711 ppHRegARM(i->ARMin.CMov.dst);
1712 vex_printf(", ");
1713 ppARMRI84(i->ARMin.CMov.src);
1714 return;
1715 case ARMin_Call:
1716 vex_printf("call%s ",
1717 i->ARMin.Call.cond==ARMcc_AL
1718 ? "" : showARMCondCode(i->ARMin.Call.cond));
1719 vex_printf("0x%x [nArgRegs=%d, ",
1720 i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
1721 ppRetLoc(i->ARMin.Call.rloc);
1722 vex_printf("]");
1723 return;
1724 case ARMin_Mul:
1725 vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
1726 if (i->ARMin.Mul.op == ARMmul_PLAIN) {
1727 vex_printf("r0, r2, r3");
1728 } else {
1729 vex_printf("r1:r0, r2, r3");
1730 }
1731 return;
1732 case ARMin_LdrEX: {
1733 const HChar* sz = "";
1734 switch (i->ARMin.LdrEX.szB) {
1735 case 1: sz = "b"; break; case 2: sz = "h"; break;
1736 case 8: sz = "d"; break; case 4: break;
1737 default: vassert(0);
1738 }
1739 vex_printf("ldrex%s %sr2, [r4]",
1740 sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
1741 return;
1742 }
1743 case ARMin_StrEX: {
1744 const HChar* sz = "";
1745 switch (i->ARMin.StrEX.szB) {
1746 case 1: sz = "b"; break; case 2: sz = "h"; break;
1747 case 8: sz = "d"; break; case 4: break;
1748 default: vassert(0);
1749 }
1750 vex_printf("strex%s r0, %sr2, [r4]",
1751 sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
1752 return;
1753 }
1754 case ARMin_VLdStD:
1755 if (i->ARMin.VLdStD.isLoad) {
1756 vex_printf("fldd ");
1757 ppHRegARM(i->ARMin.VLdStD.dD);
1758 vex_printf(", ");
1759 ppARMAModeV(i->ARMin.VLdStD.amode);
1760 } else {
1761 vex_printf("fstd ");
1762 ppARMAModeV(i->ARMin.VLdStD.amode);
1763 vex_printf(", ");
1764 ppHRegARM(i->ARMin.VLdStD.dD);
1765 }
1766 return;
1767 case ARMin_VLdStS:
1768 if (i->ARMin.VLdStS.isLoad) {
1769 vex_printf("flds ");
1770 ppHRegARM(i->ARMin.VLdStS.fD);
1771 vex_printf(", ");
1772 ppARMAModeV(i->ARMin.VLdStS.amode);
1773 } else {
1774 vex_printf("fsts ");
1775 ppARMAModeV(i->ARMin.VLdStS.amode);
1776 vex_printf(", ");
1777 ppHRegARM(i->ARMin.VLdStS.fD);
1778 }
1779 return;
1780 case ARMin_VAluD:
1781 vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
1782 ppHRegARM(i->ARMin.VAluD.dst);
1783 vex_printf(", ");
1784 ppHRegARM(i->ARMin.VAluD.argL);
1785 vex_printf(", ");
1786 ppHRegARM(i->ARMin.VAluD.argR);
1787 return;
1788 case ARMin_VAluS:
1789 vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
1790 ppHRegARM(i->ARMin.VAluS.dst);
1791 vex_printf(", ");
1792 ppHRegARM(i->ARMin.VAluS.argL);
1793 vex_printf(", ");
1794 ppHRegARM(i->ARMin.VAluS.argR);
1795 return;
1796 case ARMin_VUnaryD:
1797 vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
1798 ppHRegARM(i->ARMin.VUnaryD.dst);
1799 vex_printf(", ");
1800 ppHRegARM(i->ARMin.VUnaryD.src);
1801 return;
1802 case ARMin_VUnaryS:
1803 vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
1804 ppHRegARM(i->ARMin.VUnaryS.dst);
1805 vex_printf(", ");
1806 ppHRegARM(i->ARMin.VUnaryS.src);
1807 return;
1808 case ARMin_VCmpD:
1809 vex_printf("fcmpd ");
1810 ppHRegARM(i->ARMin.VCmpD.argL);
1811 vex_printf(", ");
1812 ppHRegARM(i->ARMin.VCmpD.argR);
1813 vex_printf(" ; fmstat");
1814 return;
1815 case ARMin_VCMovD:
1816 vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
1817 ppHRegARM(i->ARMin.VCMovD.dst);
1818 vex_printf(", ");
1819 ppHRegARM(i->ARMin.VCMovD.src);
1820 return;
1821 case ARMin_VCMovS:
1822 vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
1823 ppHRegARM(i->ARMin.VCMovS.dst);
1824 vex_printf(", ");
1825 ppHRegARM(i->ARMin.VCMovS.src);
1826 return;
1827 case ARMin_VCvtSD:
1828 vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
1829 ppHRegARM(i->ARMin.VCvtSD.dst);
1830 vex_printf(", ");
1831 ppHRegARM(i->ARMin.VCvtSD.src);
1832 return;
1833 case ARMin_VXferQ:
1834 if (i->ARMin.VXferQ.toQ) {
1835 vex_printf("vmov ");
1836 ppHRegARM(i->ARMin.VXferQ.qD);
1837 vex_printf("-lo64, ");
1838 ppHRegARM(i->ARMin.VXferQ.dLo);
1839 vex_printf(" ; vmov ");
1840 ppHRegARM(i->ARMin.VXferQ.qD);
1841 vex_printf("-hi64, ");
1842 ppHRegARM(i->ARMin.VXferQ.dHi);
1843 } else {
1844 vex_printf("vmov ");
1845 ppHRegARM(i->ARMin.VXferQ.dLo);
1846 vex_printf(", ");
1847 ppHRegARM(i->ARMin.VXferQ.qD);
1848 vex_printf("-lo64");
1849 vex_printf(" ; vmov ");
1850 ppHRegARM(i->ARMin.VXferQ.dHi);
1851 vex_printf(", ");
1852 ppHRegARM(i->ARMin.VXferQ.qD);
1853 vex_printf("-hi64");
1854 }
1855 return;
1856 case ARMin_VXferD:
1857 vex_printf("vmov ");
1858 if (i->ARMin.VXferD.toD) {
1859 ppHRegARM(i->ARMin.VXferD.dD);
1860 vex_printf(", ");
1861 ppHRegARM(i->ARMin.VXferD.rLo);
1862 vex_printf(", ");
1863 ppHRegARM(i->ARMin.VXferD.rHi);
1864 } else {
1865 ppHRegARM(i->ARMin.VXferD.rLo);
1866 vex_printf(", ");
1867 ppHRegARM(i->ARMin.VXferD.rHi);
1868 vex_printf(", ");
1869 ppHRegARM(i->ARMin.VXferD.dD);
1870 }
1871 return;
1872 case ARMin_VXferS:
1873 vex_printf("vmov ");
1874 if (i->ARMin.VXferS.toS) {
1875 ppHRegARM(i->ARMin.VXferS.fD);
1876 vex_printf(", ");
1877 ppHRegARM(i->ARMin.VXferS.rLo);
1878 } else {
1879 ppHRegARM(i->ARMin.VXferS.rLo);
1880 vex_printf(", ");
1881 ppHRegARM(i->ARMin.VXferS.fD);
1882 }
1883 return;
1884 case ARMin_VCvtID: {
1885 const HChar* nm = "?";
1886 if (i->ARMin.VCvtID.iToD) {
1887 nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
1888 } else {
1889 nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
1890 }
1891 vex_printf("%s ", nm);
1892 ppHRegARM(i->ARMin.VCvtID.dst);
1893 vex_printf(", ");
1894 ppHRegARM(i->ARMin.VCvtID.src);
1895 return;
1896 }
1897 case ARMin_VRIntR: {
1898 const HChar* sz = i->ARMin.VRIntR.isF64 ? "f64" : "f32";
1899 vex_printf("vrintr.%s.%s ", sz, sz);
1900 ppHRegARM(i->ARMin.VRIntR.dst);
1901 vex_printf(", ");
1902 ppHRegARM(i->ARMin.VRIntR.src);
1903 return;
1904 }
1905 case ARMin_VMinMaxNum: {
1906 const HChar* sz = i->ARMin.VMinMaxNum.isF64 ? "f64" : "f32";
1907 const HChar* nm = i->ARMin.VMinMaxNum.isMax ? "vmaxnm" : "vminnm";
1908 vex_printf("%s.%s ", nm, sz);
1909 ppHRegARM(i->ARMin.VMinMaxNum.dst);
1910 vex_printf(", ");
1911 ppHRegARM(i->ARMin.VMinMaxNum.srcL);
1912 vex_printf(", ");
1913 ppHRegARM(i->ARMin.VMinMaxNum.srcR);
1914 return;
1915 }
1916 case ARMin_FPSCR:
1917 if (i->ARMin.FPSCR.toFPSCR) {
1918 vex_printf("fmxr fpscr, ");
1919 ppHRegARM(i->ARMin.FPSCR.iReg);
1920 } else {
1921 vex_printf("fmrx ");
1922 ppHRegARM(i->ARMin.FPSCR.iReg);
1923 vex_printf(", fpscr");
1924 }
1925 return;
1926 case ARMin_MFence:
1927 vex_printf("(mfence) dsb sy; dmb sy; isb");
1928 return;
1929 case ARMin_CLREX:
1930 vex_printf("clrex");
1931 return;
1932 case ARMin_NLdStQ:
1933 if (i->ARMin.NLdStQ.isLoad)
1934 vex_printf("vld1.32 {");
1935 else
1936 vex_printf("vst1.32 {");
1937 ppHRegARM(i->ARMin.NLdStQ.dQ);
1938 vex_printf("} ");
1939 ppARMAModeN(i->ARMin.NLdStQ.amode);
1940 return;
1941 case ARMin_NLdStD:
1942 if (i->ARMin.NLdStD.isLoad)
1943 vex_printf("vld1.32 {");
1944 else
1945 vex_printf("vst1.32 {");
1946 ppHRegARM(i->ARMin.NLdStD.dD);
1947 vex_printf("} ");
1948 ppARMAModeN(i->ARMin.NLdStD.amode);
1949 return;
1950 case ARMin_NUnary:
1951 vex_printf("%s%s%s ",
1952 showARMNeonUnOp(i->ARMin.NUnary.op),
1953 showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1954 showARMNeonDataSize(i));
1955 ppHRegARM(i->ARMin.NUnary.dst);
1956 vex_printf(", ");
1957 ppHRegARM(i->ARMin.NUnary.src);
1958 if (i->ARMin.NUnary.op == ARMneon_EQZ)
1959 vex_printf(", #0");
1960 if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1961 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1962 i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1963 i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
1964 vex_printf(", #%u", i->ARMin.NUnary.size);
1965 }
1966 if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1967 i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1968 i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1969 UInt size;
1970 size = i->ARMin.NUnary.size;
1971 if (size & 0x40) {
1972 vex_printf(", #%u", size - 64);
1973 } else if (size & 0x20) {
1974 vex_printf(", #%u", size - 32);
1975 } else if (size & 0x10) {
1976 vex_printf(", #%u", size - 16);
1977 } else if (size & 0x08) {
1978 vex_printf(", #%u", size - 8);
1979 }
1980 }
1981 return;
1982 case ARMin_NUnaryS:
1983 vex_printf("%s%s%s ",
1984 showARMNeonUnOpS(i->ARMin.NUnaryS.op),
1985 showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
1986 showARMNeonDataSize(i));
1987 ppARMNRS(i->ARMin.NUnaryS.dst);
1988 vex_printf(", ");
1989 ppARMNRS(i->ARMin.NUnaryS.src);
1990 return;
1991 case ARMin_NShift:
1992 vex_printf("%s%s%s ",
1993 showARMNeonShiftOp(i->ARMin.NShift.op),
1994 showARMNeonShiftOpDataType(i->ARMin.NShift.op),
1995 showARMNeonDataSize(i));
1996 ppHRegARM(i->ARMin.NShift.dst);
1997 vex_printf(", ");
1998 ppHRegARM(i->ARMin.NShift.argL);
1999 vex_printf(", ");
2000 ppHRegARM(i->ARMin.NShift.argR);
2001 return;
2002 case ARMin_NShl64:
2003 vex_printf("vshl.i64 ");
2004 ppHRegARM(i->ARMin.NShl64.dst);
2005 vex_printf(", ");
2006 ppHRegARM(i->ARMin.NShl64.src);
2007 vex_printf(", #%u", i->ARMin.NShl64.amt);
2008 return;
2009 case ARMin_NDual:
2010 vex_printf("%s%s%s ",
2011 showARMNeonDualOp(i->ARMin.NDual.op),
2012 showARMNeonDualOpDataType(i->ARMin.NDual.op),
2013 showARMNeonDataSize(i));
2014 ppHRegARM(i->ARMin.NDual.arg1);
2015 vex_printf(", ");
2016 ppHRegARM(i->ARMin.NDual.arg2);
2017 return;
2018 case ARMin_NBinary:
2019 vex_printf("%s%s%s",
2020 showARMNeonBinOp(i->ARMin.NBinary.op),
2021 showARMNeonBinOpDataType(i->ARMin.NBinary.op),
2022 showARMNeonDataSize(i));
2023 vex_printf(" ");
2024 ppHRegARM(i->ARMin.NBinary.dst);
2025 vex_printf(", ");
2026 ppHRegARM(i->ARMin.NBinary.argL);
2027 vex_printf(", ");
2028 ppHRegARM(i->ARMin.NBinary.argR);
2029 return;
2030 case ARMin_NeonImm:
2031 vex_printf("vmov ");
2032 ppHRegARM(i->ARMin.NeonImm.dst);
2033 vex_printf(", ");
2034 ppARMNImm(i->ARMin.NeonImm.imm);
2035 return;
2036 case ARMin_NCMovQ:
2037 vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
2038 ppHRegARM(i->ARMin.NCMovQ.dst);
2039 vex_printf(", ");
2040 ppHRegARM(i->ARMin.NCMovQ.src);
2041 return;
2042 case ARMin_Add32:
2043 vex_printf("add32 ");
2044 ppHRegARM(i->ARMin.Add32.rD);
2045 vex_printf(", ");
2046 ppHRegARM(i->ARMin.Add32.rN);
2047 vex_printf(", ");
2048 vex_printf("%u", i->ARMin.Add32.imm32);
2049 return;
2050 case ARMin_EvCheck:
2051 vex_printf("(evCheck) ldr r12,");
2052 ppARMAMode1(i->ARMin.EvCheck.amCounter);
2053 vex_printf("; subs r12,r12,$1; str r12,");
2054 ppARMAMode1(i->ARMin.EvCheck.amCounter);
2055 vex_printf("; bpl nofail; ldr r12,");
2056 ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
2057 vex_printf("; bx r12; nofail:");
2058 return;
2059 case ARMin_ProfInc:
2060 vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
2061 "movw r12,HI16($NotKnownYet); "
2062 "ldr r11,[r12]; "
2063 "adds r11,r11,$1; "
2064 "str r11,[r12]; "
2065 "ldr r11,[r12+4]; "
2066 "adc r11,r11,$0; "
2067 "str r11,[r12+4]");
2068 return;
2069 default:
2070 vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
2071 vpanic("ppARMInstr(1)");
2072 return;
2073 }
2074 }
2075
2076
2077 /* --------- Helpers for register allocation. --------- */
2078
getRegUsage_ARMInstr(HRegUsage * u,const ARMInstr * i,Bool mode64)2079 void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
2080 {
2081 vassert(mode64 == False);
2082 initHRegUsage(u);
2083 switch (i->tag) {
2084 case ARMin_Alu:
2085 addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
2086 addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
2087 addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
2088 return;
2089 case ARMin_Shift:
2090 addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
2091 addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
2092 addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
2093 return;
2094 case ARMin_Unary:
2095 addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
2096 addHRegUse(u, HRmRead, i->ARMin.Unary.src);
2097 return;
2098 case ARMin_CmpOrTst:
2099 addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
2100 addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
2101 return;
2102 case ARMin_Mov:
2103 addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
2104 addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
2105 return;
2106 case ARMin_Imm32:
2107 addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
2108 return;
2109 case ARMin_LdSt32:
2110 addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
2111 if (i->ARMin.LdSt32.isLoad) {
2112 addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
2113 if (i->ARMin.LdSt32.cc != ARMcc_AL)
2114 addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2115 } else {
2116 addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2117 }
2118 return;
2119 case ARMin_LdSt16:
2120 addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
2121 if (i->ARMin.LdSt16.isLoad) {
2122 addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
2123 if (i->ARMin.LdSt16.cc != ARMcc_AL)
2124 addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2125 } else {
2126 addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2127 }
2128 return;
2129 case ARMin_LdSt8U:
2130 addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
2131 if (i->ARMin.LdSt8U.isLoad) {
2132 addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
2133 if (i->ARMin.LdSt8U.cc != ARMcc_AL)
2134 addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2135 } else {
2136 addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2137 }
2138 return;
2139 case ARMin_Ld8S:
2140 addRegUsage_ARMAMode2(u, i->ARMin.Ld8S.amode);
2141 addHRegUse(u, HRmWrite, i->ARMin.Ld8S.rD);
2142 if (i->ARMin.Ld8S.cc != ARMcc_AL)
2143 addHRegUse(u, HRmRead, i->ARMin.Ld8S.rD);
2144 return;
2145 /* XDirect/XIndir/XAssisted are also a bit subtle. They
2146 conditionally exit the block. Hence we only need to list (1)
2147 the registers that they read, and (2) the registers that they
2148 write in the case where the block is not exited. (2) is
2149 empty, hence only (1) is relevant here. */
2150 case ARMin_XDirect:
2151 addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
2152 return;
2153 case ARMin_XIndir:
2154 addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
2155 addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
2156 return;
2157 case ARMin_XAssisted:
2158 addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
2159 addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
2160 return;
2161 case ARMin_CMov:
2162 addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
2163 addHRegUse(u, HRmRead, i->ARMin.CMov.dst);
2164 addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
2165 return;
2166 case ARMin_Call:
2167 /* logic and comments copied/modified from x86 back end */
2168 /* This is a bit subtle. */
2169 /* First off, claim it trashes all the caller-saved regs
2170 which fall within the register allocator's jurisdiction.
2171 These I believe to be r0,1,2,3. If it turns out that r9
2172 is also caller-saved, then we'll have to add that here
2173 too. */
2174 addHRegUse(u, HRmWrite, hregARM_R0());
2175 addHRegUse(u, HRmWrite, hregARM_R1());
2176 addHRegUse(u, HRmWrite, hregARM_R2());
2177 addHRegUse(u, HRmWrite, hregARM_R3());
2178 /* Now we have to state any parameter-carrying registers
2179 which might be read. This depends on nArgRegs. */
2180 switch (i->ARMin.Call.nArgRegs) {
2181 case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
2182 case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
2183 case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
2184 case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
2185 case 0: break;
2186 default: vpanic("getRegUsage_ARM:Call:regparms");
2187 }
2188 /* Finally, there is the issue that the insn trashes a
2189 register because the literal target address has to be
2190 loaded into a register. Fortunately, for the nArgRegs=
2191 0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
2192 this does not cause any further damage. For the
2193 nArgRegs=4 case, we'll have to choose another register
2194 arbitrarily since all the caller saved regs are used for
2195 parameters, and so we might as well choose r11.
2196 */
2197 if (i->ARMin.Call.nArgRegs == 4)
2198 addHRegUse(u, HRmWrite, hregARM_R11());
2199 /* Upshot of this is that the assembler really must observe
2200 the here-stated convention of which register to use as an
2201 address temporary, depending on nArgRegs: 0==r0,
2202 1==r1, 2==r2, 3==r3, 4==r11 */
2203 return;
2204 case ARMin_Mul:
2205 addHRegUse(u, HRmRead, hregARM_R2());
2206 addHRegUse(u, HRmRead, hregARM_R3());
2207 addHRegUse(u, HRmWrite, hregARM_R0());
2208 if (i->ARMin.Mul.op != ARMmul_PLAIN)
2209 addHRegUse(u, HRmWrite, hregARM_R1());
2210 return;
2211 case ARMin_LdrEX:
2212 addHRegUse(u, HRmRead, hregARM_R4());
2213 addHRegUse(u, HRmWrite, hregARM_R2());
2214 if (i->ARMin.LdrEX.szB == 8)
2215 addHRegUse(u, HRmWrite, hregARM_R3());
2216 return;
2217 case ARMin_StrEX:
2218 addHRegUse(u, HRmRead, hregARM_R4());
2219 addHRegUse(u, HRmWrite, hregARM_R0());
2220 addHRegUse(u, HRmRead, hregARM_R2());
2221 if (i->ARMin.StrEX.szB == 8)
2222 addHRegUse(u, HRmRead, hregARM_R3());
2223 return;
2224 case ARMin_VLdStD:
2225 addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
2226 if (i->ARMin.VLdStD.isLoad) {
2227 addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
2228 } else {
2229 addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
2230 }
2231 return;
2232 case ARMin_VLdStS:
2233 addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
2234 if (i->ARMin.VLdStS.isLoad) {
2235 addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
2236 } else {
2237 addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
2238 }
2239 return;
2240 case ARMin_VAluD:
2241 addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
2242 addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
2243 addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
2244 return;
2245 case ARMin_VAluS:
2246 addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2247 addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2248 addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2249 return;
2250 case ARMin_VUnaryD:
2251 addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2252 addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2253 return;
2254 case ARMin_VUnaryS:
2255 addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2256 addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2257 return;
2258 case ARMin_VCmpD:
2259 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
2260 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
2261 return;
2262 case ARMin_VCMovD:
2263 addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2264 addHRegUse(u, HRmRead, i->ARMin.VCMovD.dst);
2265 addHRegUse(u, HRmRead, i->ARMin.VCMovD.src);
2266 return;
2267 case ARMin_VCMovS:
2268 addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2269 addHRegUse(u, HRmRead, i->ARMin.VCMovS.dst);
2270 addHRegUse(u, HRmRead, i->ARMin.VCMovS.src);
2271 return;
2272 case ARMin_VCvtSD:
2273 addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
2274 addHRegUse(u, HRmRead, i->ARMin.VCvtSD.src);
2275 return;
2276 case ARMin_VXferQ:
2277 if (i->ARMin.VXferQ.toQ) {
2278 addHRegUse(u, HRmWrite, i->ARMin.VXferQ.qD);
2279 addHRegUse(u, HRmRead, i->ARMin.VXferQ.dHi);
2280 addHRegUse(u, HRmRead, i->ARMin.VXferQ.dLo);
2281 } else {
2282 addHRegUse(u, HRmRead, i->ARMin.VXferQ.qD);
2283 addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dHi);
2284 addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dLo);
2285 }
2286 return;
2287 case ARMin_VXferD:
2288 if (i->ARMin.VXferD.toD) {
2289 addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2290 addHRegUse(u, HRmRead, i->ARMin.VXferD.rHi);
2291 addHRegUse(u, HRmRead, i->ARMin.VXferD.rLo);
2292 } else {
2293 addHRegUse(u, HRmRead, i->ARMin.VXferD.dD);
2294 addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2295 addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2296 }
2297 return;
2298 case ARMin_VXferS:
2299 if (i->ARMin.VXferS.toS) {
2300 addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2301 addHRegUse(u, HRmRead, i->ARMin.VXferS.rLo);
2302 } else {
2303 addHRegUse(u, HRmRead, i->ARMin.VXferS.fD);
2304 addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2305 }
2306 return;
2307 case ARMin_VCvtID:
2308 addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2309 addHRegUse(u, HRmRead, i->ARMin.VCvtID.src);
2310 return;
2311 case ARMin_VRIntR:
2312 addHRegUse(u, HRmWrite, i->ARMin.VRIntR.dst);
2313 addHRegUse(u, HRmRead, i->ARMin.VRIntR.src);
2314 return;
2315 case ARMin_VMinMaxNum:
2316 addHRegUse(u, HRmWrite, i->ARMin.VMinMaxNum.dst);
2317 addHRegUse(u, HRmRead, i->ARMin.VMinMaxNum.srcL);
2318 addHRegUse(u, HRmRead, i->ARMin.VMinMaxNum.srcR);
2319 return;
2320 case ARMin_FPSCR:
2321 if (i->ARMin.FPSCR.toFPSCR)
2322 addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
2323 else
2324 addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
2325 return;
2326 case ARMin_MFence:
2327 return;
2328 case ARMin_CLREX:
2329 return;
2330 case ARMin_NLdStQ:
2331 if (i->ARMin.NLdStQ.isLoad)
2332 addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
2333 else
2334 addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
2335 addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
2336 return;
2337 case ARMin_NLdStD:
2338 if (i->ARMin.NLdStD.isLoad)
2339 addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2340 else
2341 addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2342 addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2343 return;
2344 case ARMin_NUnary:
2345 addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2346 addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2347 return;
2348 case ARMin_NUnaryS:
2349 addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2350 addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2351 return;
2352 case ARMin_NShift:
2353 addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2354 addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2355 addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2356 return;
2357 case ARMin_NShl64:
2358 addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
2359 addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
2360 return;
2361 case ARMin_NDual:
2362 addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2363 addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2364 addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2365 addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2366 return;
2367 case ARMin_NBinary:
2368 addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2369 /* TODO: sometimes dst is also being read! */
2370 // XXX fix this
2371 addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2372 addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2373 return;
2374 case ARMin_NeonImm:
2375 addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
2376 return;
2377 case ARMin_NCMovQ:
2378 addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2379 addHRegUse(u, HRmRead, i->ARMin.NCMovQ.dst);
2380 addHRegUse(u, HRmRead, i->ARMin.NCMovQ.src);
2381 return;
2382 case ARMin_Add32:
2383 addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2384 addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2385 return;
2386 case ARMin_EvCheck:
2387 /* We expect both amodes only to mention r8, so this is in
2388 fact pointless, since r8 isn't allocatable, but
2389 anyway.. */
2390 addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
2391 addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
2392 addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
2393 return;
2394 case ARMin_ProfInc:
2395 addHRegUse(u, HRmWrite, hregARM_R12());
2396 addHRegUse(u, HRmWrite, hregARM_R11());
2397 return;
2398 default:
2399 ppARMInstr(i);
2400 vpanic("getRegUsage_ARMInstr");
2401 }
2402 }
2403
2404
mapRegs_ARMInstr(HRegRemap * m,ARMInstr * i,Bool mode64)2405 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2406 {
2407 vassert(mode64 == False);
2408 switch (i->tag) {
2409 case ARMin_Alu:
2410 i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
2411 i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
2412 mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
2413 return;
2414 case ARMin_Shift:
2415 i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
2416 i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
2417 mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
2418 return;
2419 case ARMin_Unary:
2420 i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
2421 i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
2422 return;
2423 case ARMin_CmpOrTst:
2424 i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
2425 mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
2426 return;
2427 case ARMin_Mov:
2428 i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
2429 mapRegs_ARMRI84(m, i->ARMin.Mov.src);
2430 return;
2431 case ARMin_Imm32:
2432 i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
2433 return;
2434 case ARMin_LdSt32:
2435 i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
2436 mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
2437 return;
2438 case ARMin_LdSt16:
2439 i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
2440 mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
2441 return;
2442 case ARMin_LdSt8U:
2443 i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
2444 mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
2445 return;
2446 case ARMin_Ld8S:
2447 i->ARMin.Ld8S.rD = lookupHRegRemap(m, i->ARMin.Ld8S.rD);
2448 mapRegs_ARMAMode2(m, i->ARMin.Ld8S.amode);
2449 return;
2450 case ARMin_XDirect:
2451 mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
2452 return;
2453 case ARMin_XIndir:
2454 i->ARMin.XIndir.dstGA
2455 = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
2456 mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
2457 return;
2458 case ARMin_XAssisted:
2459 i->ARMin.XAssisted.dstGA
2460 = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
2461 mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
2462 return;
2463 case ARMin_CMov:
2464 i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
2465 mapRegs_ARMRI84(m, i->ARMin.CMov.src);
2466 return;
2467 case ARMin_Call:
2468 return;
2469 case ARMin_Mul:
2470 return;
2471 case ARMin_LdrEX:
2472 return;
2473 case ARMin_StrEX:
2474 return;
2475 case ARMin_VLdStD:
2476 i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
2477 mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
2478 return;
2479 case ARMin_VLdStS:
2480 i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
2481 mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
2482 return;
2483 case ARMin_VAluD:
2484 i->ARMin.VAluD.dst = lookupHRegRemap(m, i->ARMin.VAluD.dst);
2485 i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
2486 i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
2487 return;
2488 case ARMin_VAluS:
2489 i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst);
2490 i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
2491 i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
2492 return;
2493 case ARMin_VUnaryD:
2494 i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
2495 i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
2496 return;
2497 case ARMin_VUnaryS:
2498 i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
2499 i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
2500 return;
2501 case ARMin_VCmpD:
2502 i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
2503 i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
2504 return;
2505 case ARMin_VCMovD:
2506 i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
2507 i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
2508 return;
2509 case ARMin_VCMovS:
2510 i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
2511 i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
2512 return;
2513 case ARMin_VCvtSD:
2514 i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
2515 i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
2516 return;
2517 case ARMin_VXferQ:
2518 i->ARMin.VXferQ.qD = lookupHRegRemap(m, i->ARMin.VXferQ.qD);
2519 i->ARMin.VXferQ.dHi = lookupHRegRemap(m, i->ARMin.VXferQ.dHi);
2520 i->ARMin.VXferQ.dLo = lookupHRegRemap(m, i->ARMin.VXferQ.dLo);
2521 return;
2522 case ARMin_VXferD:
2523 i->ARMin.VXferD.dD = lookupHRegRemap(m, i->ARMin.VXferD.dD);
2524 i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
2525 i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
2526 return;
2527 case ARMin_VXferS:
2528 i->ARMin.VXferS.fD = lookupHRegRemap(m, i->ARMin.VXferS.fD);
2529 i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
2530 return;
2531 case ARMin_VCvtID:
2532 i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
2533 i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
2534 return;
2535 case ARMin_VRIntR:
2536 i->ARMin.VRIntR.dst = lookupHRegRemap(m, i->ARMin.VRIntR.dst);
2537 i->ARMin.VRIntR.src = lookupHRegRemap(m, i->ARMin.VRIntR.src);
2538 return;
2539 case ARMin_VMinMaxNum:
2540 i->ARMin.VMinMaxNum.dst
2541 = lookupHRegRemap(m, i->ARMin.VMinMaxNum.dst);
2542 i->ARMin.VMinMaxNum.srcL
2543 = lookupHRegRemap(m, i->ARMin.VMinMaxNum.srcL);
2544 i->ARMin.VMinMaxNum.srcR
2545 = lookupHRegRemap(m, i->ARMin.VMinMaxNum.srcR);
2546 return;
2547 case ARMin_FPSCR:
2548 i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
2549 return;
2550 case ARMin_MFence:
2551 return;
2552 case ARMin_CLREX:
2553 return;
2554 case ARMin_NLdStQ:
2555 i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
2556 mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
2557 return;
2558 case ARMin_NLdStD:
2559 i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
2560 mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
2561 return;
2562 case ARMin_NUnary:
2563 i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
2564 i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
2565 return;
2566 case ARMin_NUnaryS:
2567 i->ARMin.NUnaryS.src->reg
2568 = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
2569 i->ARMin.NUnaryS.dst->reg
2570 = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
2571 return;
2572 case ARMin_NShift:
2573 i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
2574 i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
2575 i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
2576 return;
2577 case ARMin_NShl64:
2578 i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
2579 i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
2580 return;
2581 case ARMin_NDual:
2582 i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
2583 i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
2584 return;
2585 case ARMin_NBinary:
2586 i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
2587 i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
2588 i->ARMin.NBinary.dst = lookupHRegRemap(m, i->ARMin.NBinary.dst);
2589 return;
2590 case ARMin_NeonImm:
2591 i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
2592 return;
2593 case ARMin_NCMovQ:
2594 i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
2595 i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
2596 return;
2597 case ARMin_Add32:
2598 i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
2599 i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
2600 return;
2601 case ARMin_EvCheck:
2602 /* We expect both amodes only to mention r8, so this is in
2603 fact pointless, since r8 isn't allocatable, but
2604 anyway.. */
2605 mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
2606 mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
2607 return;
2608 case ARMin_ProfInc:
2609 /* hardwires r11 and r12 -- nothing to modify. */
2610 return;
2611 default:
2612 ppARMInstr(i);
2613 vpanic("mapRegs_ARMInstr");
2614 }
2615 }
2616
2617 /* Figure out if i represents a reg-reg move, and if so assign the
2618 source and destination to *src and *dst. If in doubt say No. Used
2619 by the register allocator to do move coalescing.
2620 */
isMove_ARMInstr(const ARMInstr * i,HReg * src,HReg * dst)2621 Bool isMove_ARMInstr ( const ARMInstr* i, HReg* src, HReg* dst )
2622 {
2623 /* Moves between integer regs */
2624 switch (i->tag) {
2625 case ARMin_Mov:
2626 if (i->ARMin.Mov.src->tag == ARMri84_R) {
2627 *src = i->ARMin.Mov.src->ARMri84.R.reg;
2628 *dst = i->ARMin.Mov.dst;
2629 return True;
2630 }
2631 break;
2632 case ARMin_VUnaryD:
2633 if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2634 *src = i->ARMin.VUnaryD.src;
2635 *dst = i->ARMin.VUnaryD.dst;
2636 return True;
2637 }
2638 break;
2639 case ARMin_VUnaryS:
2640 if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2641 *src = i->ARMin.VUnaryS.src;
2642 *dst = i->ARMin.VUnaryS.dst;
2643 return True;
2644 }
2645 break;
2646 case ARMin_NUnary:
2647 if (i->ARMin.NUnary.op == ARMneon_COPY) {
2648 *src = i->ARMin.NUnary.src;
2649 *dst = i->ARMin.NUnary.dst;
2650 return True;
2651 }
2652 break;
2653 default:
2654 break;
2655 }
2656
2657 return False;
2658 }
2659
2660
2661 /* Generate arm spill/reload instructions under the direction of the
2662 register allocator. Note it's critical these don't write the
2663 condition codes. */
2664
genSpill_ARM(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2665 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2666 HReg rreg, Int offsetB, Bool mode64 )
2667 {
2668 HRegClass rclass;
2669 vassert(offsetB >= 0);
2670 vassert(!hregIsVirtual(rreg));
2671 vassert(mode64 == False);
2672 *i1 = *i2 = NULL;
2673 rclass = hregClass(rreg);
2674 switch (rclass) {
2675 case HRcInt32:
2676 vassert(offsetB <= 4095);
2677 *i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
2678 rreg,
2679 ARMAMode1_RI(hregARM_R8(), offsetB) );
2680 return;
2681 case HRcFlt32:
2682 case HRcFlt64: {
2683 HReg r8 = hregARM_R8(); /* baseblock */
2684 HReg r12 = hregARM_R12(); /* spill temp */
2685 HReg base = r8;
2686 vassert(0 == (offsetB & 3));
2687 if (offsetB >= 1024) {
2688 Int offsetKB = offsetB / 1024;
2689 /* r12 = r8 + (1024 * offsetKB) */
2690 *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2691 ARMRI84_I84(offsetKB, 11));
2692 offsetB -= (1024 * offsetKB);
2693 base = r12;
2694 }
2695 vassert(offsetB <= 1020);
2696 if (rclass == HRcFlt32) {
2697 *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
2698 rreg,
2699 mkARMAModeV(base, offsetB) );
2700 } else {
2701 *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
2702 rreg,
2703 mkARMAModeV(base, offsetB) );
2704 }
2705 return;
2706 }
2707 case HRcVec128: {
2708 HReg r8 = hregARM_R8();
2709 HReg r12 = hregARM_R12();
2710 *i1 = ARMInstr_Add32(r12, r8, offsetB);
2711 *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
2712 return;
2713 }
2714 default:
2715 ppHRegClass(rclass);
2716 vpanic("genSpill_ARM: unimplemented regclass");
2717 }
2718 }
2719
genReload_ARM(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2720 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2721 HReg rreg, Int offsetB, Bool mode64 )
2722 {
2723 HRegClass rclass;
2724 vassert(offsetB >= 0);
2725 vassert(!hregIsVirtual(rreg));
2726 vassert(mode64 == False);
2727 *i1 = *i2 = NULL;
2728 rclass = hregClass(rreg);
2729 switch (rclass) {
2730 case HRcInt32:
2731 vassert(offsetB <= 4095);
2732 *i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
2733 rreg,
2734 ARMAMode1_RI(hregARM_R8(), offsetB) );
2735 return;
2736 case HRcFlt32:
2737 case HRcFlt64: {
2738 HReg r8 = hregARM_R8(); /* baseblock */
2739 HReg r12 = hregARM_R12(); /* spill temp */
2740 HReg base = r8;
2741 vassert(0 == (offsetB & 3));
2742 if (offsetB >= 1024) {
2743 Int offsetKB = offsetB / 1024;
2744 /* r12 = r8 + (1024 * offsetKB) */
2745 *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2746 ARMRI84_I84(offsetKB, 11));
2747 offsetB -= (1024 * offsetKB);
2748 base = r12;
2749 }
2750 vassert(offsetB <= 1020);
2751 if (rclass == HRcFlt32) {
2752 *i2 = ARMInstr_VLdStS( True/*isLoad*/,
2753 rreg,
2754 mkARMAModeV(base, offsetB) );
2755 } else {
2756 *i2 = ARMInstr_VLdStD( True/*isLoad*/,
2757 rreg,
2758 mkARMAModeV(base, offsetB) );
2759 }
2760 return;
2761 }
2762 case HRcVec128: {
2763 HReg r8 = hregARM_R8();
2764 HReg r12 = hregARM_R12();
2765 *i1 = ARMInstr_Add32(r12, r8, offsetB);
2766 *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
2767 return;
2768 }
2769 default:
2770 ppHRegClass(rclass);
2771 vpanic("genReload_ARM: unimplemented regclass");
2772 }
2773 }
2774
2775
2776 /* Emit an instruction into buf and return the number of bytes used.
2777 Note that buf is not the insn's final place, and therefore it is
2778 imperative to emit position-independent code. */
2779
iregEnc(HReg r)2780 static inline UInt iregEnc ( HReg r )
2781 {
2782 UInt n;
2783 vassert(hregClass(r) == HRcInt32);
2784 vassert(!hregIsVirtual(r));
2785 n = hregEncoding(r);
2786 vassert(n <= 15);
2787 return n;
2788 }
2789
dregEnc(HReg r)2790 static inline UInt dregEnc ( HReg r )
2791 {
2792 UInt n;
2793 vassert(hregClass(r) == HRcFlt64);
2794 vassert(!hregIsVirtual(r));
2795 n = hregEncoding(r);
2796 vassert(n <= 31);
2797 return n;
2798 }
2799
fregEnc(HReg r)2800 static inline UInt fregEnc ( HReg r )
2801 {
2802 UInt n;
2803 vassert(hregClass(r) == HRcFlt32);
2804 vassert(!hregIsVirtual(r));
2805 n = hregEncoding(r);
2806 vassert(n <= 31);
2807 return n;
2808 }
2809
qregEnc(HReg r)2810 static inline UInt qregEnc ( HReg r )
2811 {
2812 UInt n;
2813 vassert(hregClass(r) == HRcVec128);
2814 vassert(!hregIsVirtual(r));
2815 n = hregEncoding(r);
2816 vassert(n <= 15);
2817 return n;
2818 }
2819
2820 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2821 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2822 #define X0000 BITS4(0,0,0,0)
2823 #define X0001 BITS4(0,0,0,1)
2824 #define X0010 BITS4(0,0,1,0)
2825 #define X0011 BITS4(0,0,1,1)
2826 #define X0100 BITS4(0,1,0,0)
2827 #define X0101 BITS4(0,1,0,1)
2828 #define X0110 BITS4(0,1,1,0)
2829 #define X0111 BITS4(0,1,1,1)
2830 #define X1000 BITS4(1,0,0,0)
2831 #define X1001 BITS4(1,0,0,1)
2832 #define X1010 BITS4(1,0,1,0)
2833 #define X1011 BITS4(1,0,1,1)
2834 #define X1100 BITS4(1,1,0,0)
2835 #define X1101 BITS4(1,1,0,1)
2836 #define X1110 BITS4(1,1,1,0)
2837 #define X1111 BITS4(1,1,1,1)
2838
2839 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2840 (((((UInt)(zzx7)) & 0xF) << 28) | \
2841 (((zzx6) & 0xF) << 24) | \
2842 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2843 (((zzx3) & 0xF) << 12))
2844
2845 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2846 (((((UInt)(zzx7)) & 0xF) << 28) | \
2847 (((zzx6) & 0xF) << 24) | \
2848 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2849 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
2850
2851 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
2852 (((((UInt)(zzx7)) & 0xF) << 28) | \
2853 (((zzx6) & 0xF) << 24) | \
2854 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2855 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
2856
2857 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2858 (((((UInt)(zzx7)) & 0xF) << 28) | \
2859 (((zzx6) & 0xF) << 24) | \
2860 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2861 (((zzx0) & 0xF) << 0))
2862
2863 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
2864 (((((UInt)(zzx7)) & 0xF) << 28) | \
2865 (((zzx6) & 0xF) << 24) | \
2866 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2867 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
2868 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
2869
2870 #define XX______(zzx7,zzx6) \
2871 (((((UInt)(zzx7)) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2872
2873 /* Generate a skeletal insn that involves an a RI84 shifter operand.
2874 Returns a word which is all zeroes apart from bits 25 and 11..0,
2875 since it is those that encode the shifter operand (at least to the
2876 extent that we care about it.) */
skeletal_RI84(ARMRI84 * ri)2877 static UInt skeletal_RI84 ( ARMRI84* ri )
2878 {
2879 UInt instr;
2880 if (ri->tag == ARMri84_I84) {
2881 vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
2882 vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
2883 instr = 1 << 25;
2884 instr |= (ri->ARMri84.I84.imm4 << 8);
2885 instr |= ri->ARMri84.I84.imm8;
2886 } else {
2887 instr = 0 << 25;
2888 instr |= iregEnc(ri->ARMri84.R.reg);
2889 }
2890 return instr;
2891 }
2892
2893 /* Ditto for RI5. Resulting word is zeroes apart from bit 4 and bits
2894 11..7. */
skeletal_RI5(ARMRI5 * ri)2895 static UInt skeletal_RI5 ( ARMRI5* ri )
2896 {
2897 UInt instr;
2898 if (ri->tag == ARMri5_I5) {
2899 UInt imm5 = ri->ARMri5.I5.imm5;
2900 vassert(imm5 >= 1 && imm5 <= 31);
2901 instr = 0 << 4;
2902 instr |= imm5 << 7;
2903 } else {
2904 instr = 1 << 4;
2905 instr |= iregEnc(ri->ARMri5.R.reg) << 8;
2906 }
2907 return instr;
2908 }
2909
2910
2911 /* Get an immediate into a register, using only that
2912 register. (very lame..) */
imm32_to_ireg(UInt * p,Int rD,UInt imm32)2913 static UInt* imm32_to_ireg ( UInt* p, Int rD, UInt imm32 )
2914 {
2915 UInt instr;
2916 vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
2917 #if 0
2918 if (0 == (imm32 & ~0xFF)) {
2919 /* mov with a immediate shifter operand of (0, imm32) (??) */
2920 instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
2921 instr |= imm32;
2922 *p++ = instr;
2923 } else {
2924 // this is very bad; causes Dcache pollution
2925 // ldr rD, [pc]
2926 instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
2927 *p++ = instr;
2928 // b .+8
2929 instr = 0xEA000000;
2930 *p++ = instr;
2931 // .word imm32
2932 *p++ = imm32;
2933 }
2934 #else
2935 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2936 /* Generate movw rD, #low16. Then, if the high 16 are
2937 nonzero, generate movt rD, #high16. */
2938 UInt lo16 = imm32 & 0xFFFF;
2939 UInt hi16 = (imm32 >> 16) & 0xFFFF;
2940 instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2941 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2942 lo16 & 0xF);
2943 *p++ = instr;
2944 if (hi16 != 0) {
2945 instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2946 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2947 hi16 & 0xF);
2948 *p++ = instr;
2949 }
2950 } else {
2951 UInt imm, rot;
2952 UInt op = X1010;
2953 UInt rN = 0;
2954 if ((imm32 & 0xFF) || (imm32 == 0)) {
2955 imm = imm32 & 0xFF;
2956 rot = 0;
2957 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2958 *p++ = instr;
2959 op = X1000;
2960 rN = rD;
2961 }
2962 if (imm32 & 0xFF000000) {
2963 imm = (imm32 >> 24) & 0xFF;
2964 rot = 4;
2965 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2966 *p++ = instr;
2967 op = X1000;
2968 rN = rD;
2969 }
2970 if (imm32 & 0xFF0000) {
2971 imm = (imm32 >> 16) & 0xFF;
2972 rot = 8;
2973 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2974 *p++ = instr;
2975 op = X1000;
2976 rN = rD;
2977 }
2978 if (imm32 & 0xFF00) {
2979 imm = (imm32 >> 8) & 0xFF;
2980 rot = 12;
2981 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2982 *p++ = instr;
2983 op = X1000;
2984 rN = rD;
2985 }
2986 }
2987 #endif
2988 return p;
2989 }
2990
2991 /* Get an immediate into a register, using only that register, and
2992 generating exactly 2 instructions, regardless of the value of the
2993 immediate. This is used when generating sections of code that need
2994 to be patched later, so as to guarantee a specific size. */
imm32_to_ireg_EXACTLY2(UInt * p,Int rD,UInt imm32)2995 static UInt* imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2996 {
2997 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2998 /* Generate movw rD, #low16 ; movt rD, #high16. */
2999 UInt lo16 = imm32 & 0xFFFF;
3000 UInt hi16 = (imm32 >> 16) & 0xFFFF;
3001 UInt instr;
3002 instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
3003 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
3004 lo16 & 0xF);
3005 *p++ = instr;
3006 instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
3007 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
3008 hi16 & 0xF);
3009 *p++ = instr;
3010 } else {
3011 vassert(0); /* lose */
3012 }
3013 return p;
3014 }
3015
3016 /* Check whether p points at a 2-insn sequence cooked up by
3017 imm32_to_ireg_EXACTLY2(). */
is_imm32_to_ireg_EXACTLY2(UInt * p,Int rD,UInt imm32)3018 static Bool is_imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
3019 {
3020 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
3021 /* Generate movw rD, #low16 ; movt rD, #high16. */
3022 UInt lo16 = imm32 & 0xFFFF;
3023 UInt hi16 = (imm32 >> 16) & 0xFFFF;
3024 UInt i0, i1;
3025 i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
3026 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
3027 lo16 & 0xF);
3028 i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
3029 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
3030 hi16 & 0xF);
3031 return p[0] == i0 && p[1] == i1;
3032 } else {
3033 vassert(0); /* lose */
3034 }
3035 }
3036
3037
do_load_or_store32(UInt * p,Bool isLoad,UInt rD,ARMAMode1 * am)3038 static UInt* do_load_or_store32 ( UInt* p,
3039 Bool isLoad, UInt rD, ARMAMode1* am )
3040 {
3041 vassert(rD <= 12);
3042 vassert(am->tag == ARMam1_RI); // RR case is not handled
3043 UInt bB = 0;
3044 UInt bL = isLoad ? 1 : 0;
3045 Int simm12;
3046 UInt instr, bP;
3047 if (am->ARMam1.RI.simm13 < 0) {
3048 bP = 0;
3049 simm12 = -am->ARMam1.RI.simm13;
3050 } else {
3051 bP = 1;
3052 simm12 = am->ARMam1.RI.simm13;
3053 }
3054 vassert(simm12 >= 0 && simm12 <= 4095);
3055 instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
3056 iregEnc(am->ARMam1.RI.reg),
3057 rD);
3058 instr |= simm12;
3059 *p++ = instr;
3060 return p;
3061 }
3062
3063
3064 /* Emit an instruction into buf and return the number of bytes used.
3065 Note that buf is not the insn's final place, and therefore it is
3066 imperative to emit position-independent code. If the emitted
3067 instruction was a profiler inc, set *is_profInc to True, else
3068 leave it unchanged. */
3069
emit_ARMInstr(Bool * is_profInc,UChar * buf,Int nbuf,const ARMInstr * i,Bool mode64,VexEndness endness_host,const void * disp_cp_chain_me_to_slowEP,const void * disp_cp_chain_me_to_fastEP,const void * disp_cp_xindir,const void * disp_cp_xassisted)3070 Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
3071 UChar* buf, Int nbuf, const ARMInstr* i,
3072 Bool mode64, VexEndness endness_host,
3073 const void* disp_cp_chain_me_to_slowEP,
3074 const void* disp_cp_chain_me_to_fastEP,
3075 const void* disp_cp_xindir,
3076 const void* disp_cp_xassisted )
3077 {
3078 UInt* p = (UInt*)buf;
3079 vassert(nbuf >= 32);
3080 vassert(mode64 == False);
3081 vassert(0 == (((HWord)buf) & 3));
3082
3083 switch (i->tag) {
3084 case ARMin_Alu: {
3085 UInt instr, subopc;
3086 UInt rD = iregEnc(i->ARMin.Alu.dst);
3087 UInt rN = iregEnc(i->ARMin.Alu.argL);
3088 ARMRI84* argR = i->ARMin.Alu.argR;
3089 switch (i->ARMin.Alu.op) {
3090 case ARMalu_ADDS: /* fallthru */
3091 case ARMalu_ADD: subopc = X0100; break;
3092 case ARMalu_ADC: subopc = X0101; break;
3093 case ARMalu_SUBS: /* fallthru */
3094 case ARMalu_SUB: subopc = X0010; break;
3095 case ARMalu_SBC: subopc = X0110; break;
3096 case ARMalu_AND: subopc = X0000; break;
3097 case ARMalu_BIC: subopc = X1110; break;
3098 case ARMalu_OR: subopc = X1100; break;
3099 case ARMalu_XOR: subopc = X0001; break;
3100 default: goto bad;
3101 }
3102 instr = skeletal_RI84(argR);
3103 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3104 (subopc << 1) & 0xF, rN, rD);
3105 if (i->ARMin.Alu.op == ARMalu_ADDS
3106 || i->ARMin.Alu.op == ARMalu_SUBS) {
3107 instr |= 1<<20; /* set the S bit */
3108 }
3109 *p++ = instr;
3110 goto done;
3111 }
3112 case ARMin_Shift: {
3113 UInt instr, subopc;
3114 UInt rD = iregEnc(i->ARMin.Shift.dst);
3115 UInt rM = iregEnc(i->ARMin.Shift.argL);
3116 ARMRI5* argR = i->ARMin.Shift.argR;
3117 switch (i->ARMin.Shift.op) {
3118 case ARMsh_SHL: subopc = X0000; break;
3119 case ARMsh_SHR: subopc = X0001; break;
3120 case ARMsh_SAR: subopc = X0010; break;
3121 default: goto bad;
3122 }
3123 instr = skeletal_RI5(argR);
3124 instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
3125 instr |= (subopc & 3) << 5;
3126 *p++ = instr;
3127 goto done;
3128 }
3129 case ARMin_Unary: {
3130 UInt instr;
3131 UInt rDst = iregEnc(i->ARMin.Unary.dst);
3132 UInt rSrc = iregEnc(i->ARMin.Unary.src);
3133 switch (i->ARMin.Unary.op) {
3134 case ARMun_CLZ:
3135 instr = XXXXXXXX(X1110,X0001,X0110,X1111,
3136 rDst,X1111,X0001,rSrc);
3137 *p++ = instr;
3138 goto done;
3139 case ARMun_NEG: /* RSB rD,rS,#0 */
3140 instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
3141 *p++ = instr;
3142 goto done;
3143 case ARMun_NOT: {
3144 UInt subopc = X1111; /* MVN */
3145 instr = rSrc;
3146 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3147 (subopc << 1) & 0xF, 0, rDst);
3148 *p++ = instr;
3149 goto done;
3150 }
3151 default:
3152 break;
3153 }
3154 goto bad;
3155 }
3156 case ARMin_CmpOrTst: {
3157 UInt instr = skeletal_RI84(i->ARMin.CmpOrTst.argR);
3158 UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
3159 UInt SBZ = 0;
3160 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3161 ((subopc << 1) & 0xF) | 1,
3162 iregEnc(i->ARMin.CmpOrTst.argL), SBZ );
3163 *p++ = instr;
3164 goto done;
3165 }
3166 case ARMin_Mov: {
3167 UInt instr = skeletal_RI84(i->ARMin.Mov.src);
3168 UInt subopc = X1101; /* MOV */
3169 UInt SBZ = 0;
3170 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3171 (subopc << 1) & 0xF, SBZ,
3172 iregEnc(i->ARMin.Mov.dst));
3173 *p++ = instr;
3174 goto done;
3175 }
3176 case ARMin_Imm32: {
3177 p = imm32_to_ireg( (UInt*)p, iregEnc(i->ARMin.Imm32.dst),
3178 i->ARMin.Imm32.imm32 );
3179 goto done;
3180 }
3181 case ARMin_LdSt32:
3182 case ARMin_LdSt8U: {
3183 UInt bL, bB;
3184 HReg rD;
3185 ARMAMode1* am;
3186 ARMCondCode cc;
3187 if (i->tag == ARMin_LdSt32) {
3188 bB = 0;
3189 bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
3190 am = i->ARMin.LdSt32.amode;
3191 rD = i->ARMin.LdSt32.rD;
3192 cc = i->ARMin.LdSt32.cc;
3193 } else {
3194 bB = 1;
3195 bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
3196 am = i->ARMin.LdSt8U.amode;
3197 rD = i->ARMin.LdSt8U.rD;
3198 cc = i->ARMin.LdSt8U.cc;
3199 }
3200 vassert(cc != ARMcc_NV);
3201 if (am->tag == ARMam1_RI) {
3202 Int simm12;
3203 UInt instr, bP;
3204 if (am->ARMam1.RI.simm13 < 0) {
3205 bP = 0;
3206 simm12 = -am->ARMam1.RI.simm13;
3207 } else {
3208 bP = 1;
3209 simm12 = am->ARMam1.RI.simm13;
3210 }
3211 vassert(simm12 >= 0 && simm12 <= 4095);
3212 instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
3213 iregEnc(am->ARMam1.RI.reg),
3214 iregEnc(rD));
3215 instr |= simm12;
3216 *p++ = instr;
3217 goto done;
3218 } else {
3219 // RR case
3220 goto bad;
3221 }
3222 }
3223 case ARMin_LdSt16: {
3224 HReg rD = i->ARMin.LdSt16.rD;
3225 UInt bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
3226 UInt bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
3227 ARMAMode2* am = i->ARMin.LdSt16.amode;
3228 ARMCondCode cc = i->ARMin.LdSt16.cc;
3229 vassert(cc != ARMcc_NV);
3230 if (am->tag == ARMam2_RI) {
3231 HReg rN = am->ARMam2.RI.reg;
3232 Int simm8;
3233 UInt bP, imm8hi, imm8lo, instr;
3234 if (am->ARMam2.RI.simm9 < 0) {
3235 bP = 0;
3236 simm8 = -am->ARMam2.RI.simm9;
3237 } else {
3238 bP = 1;
3239 simm8 = am->ARMam2.RI.simm9;
3240 }
3241 vassert(simm8 >= 0 && simm8 <= 255);
3242 imm8hi = (simm8 >> 4) & 0xF;
3243 imm8lo = simm8 & 0xF;
3244 vassert(!(bL == 0 && bS == 1)); // "! signed store"
3245 /**/ if (bL == 0 && bS == 0) {
3246 // strh
3247 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregEnc(rN),
3248 iregEnc(rD), imm8hi, X1011, imm8lo);
3249 *p++ = instr;
3250 goto done;
3251 }
3252 else if (bL == 1 && bS == 0) {
3253 // ldrh
3254 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3255 iregEnc(rD), imm8hi, X1011, imm8lo);
3256 *p++ = instr;
3257 goto done;
3258 }
3259 else if (bL == 1 && bS == 1) {
3260 // ldrsh
3261 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3262 iregEnc(rD), imm8hi, X1111, imm8lo);
3263 *p++ = instr;
3264 goto done;
3265 }
3266 else vassert(0); // ill-constructed insn
3267 } else {
3268 // RR case
3269 goto bad;
3270 }
3271 }
3272 case ARMin_Ld8S: {
3273 HReg rD = i->ARMin.Ld8S.rD;
3274 ARMAMode2* am = i->ARMin.Ld8S.amode;
3275 ARMCondCode cc = i->ARMin.Ld8S.cc;
3276 vassert(cc != ARMcc_NV);
3277 if (am->tag == ARMam2_RI) {
3278 HReg rN = am->ARMam2.RI.reg;
3279 Int simm8;
3280 UInt bP, imm8hi, imm8lo, instr;
3281 if (am->ARMam2.RI.simm9 < 0) {
3282 bP = 0;
3283 simm8 = -am->ARMam2.RI.simm9;
3284 } else {
3285 bP = 1;
3286 simm8 = am->ARMam2.RI.simm9;
3287 }
3288 vassert(simm8 >= 0 && simm8 <= 255);
3289 imm8hi = (simm8 >> 4) & 0xF;
3290 imm8lo = simm8 & 0xF;
3291 // ldrsb
3292 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3293 iregEnc(rD), imm8hi, X1101, imm8lo);
3294 *p++ = instr;
3295 goto done;
3296 } else {
3297 // RR case
3298 goto bad;
3299 }
3300 }
3301
3302 case ARMin_XDirect: {
3303 /* NB: what goes on here has to be very closely coordinated
3304 with the chainXDirect_ARM and unchainXDirect_ARM below. */
3305 /* We're generating chain-me requests here, so we need to be
3306 sure this is actually allowed -- no-redir translations
3307 can't use chain-me's. Hence: */
3308 vassert(disp_cp_chain_me_to_slowEP != NULL);
3309 vassert(disp_cp_chain_me_to_fastEP != NULL);
3310
3311 /* Use ptmp for backpatching conditional jumps. */
3312 UInt* ptmp = NULL;
3313
3314 /* First off, if this is conditional, create a conditional
3315 jump over the rest of it. Or at least, leave a space for
3316 it that we will shortly fill in. */
3317 if (i->ARMin.XDirect.cond != ARMcc_AL) {
3318 vassert(i->ARMin.XDirect.cond != ARMcc_NV);
3319 ptmp = p;
3320 *p++ = 0;
3321 }
3322
3323 /* Update the guest R15T. */
3324 /* movw r12, lo16(dstGA) */
3325 /* movt r12, hi16(dstGA) */
3326 /* str r12, amR15T */
3327 p = imm32_to_ireg(p, /*r*/12, i->ARMin.XDirect.dstGA);
3328 p = do_load_or_store32(p, False/*!isLoad*/,
3329 /*r*/12, i->ARMin.XDirect.amR15T);
3330
3331 /* --- FIRST PATCHABLE BYTE follows --- */
3332 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3333 calling to) backs up the return address, so as to find the
3334 address of the first patchable byte. So: don't change the
3335 number of instructions (3) below. */
3336 /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3337 /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3338 /* blx r12 (A1) */
3339 const void* disp_cp_chain_me
3340 = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3341 : disp_cp_chain_me_to_slowEP;
3342 p = imm32_to_ireg_EXACTLY2(p, /*r*/12,
3343 (UInt)(Addr)disp_cp_chain_me);
3344 *p++ = 0xE12FFF3C;
3345 /* --- END of PATCHABLE BYTES --- */
3346
3347 /* Fix up the conditional jump, if there was one. */
3348 if (i->ARMin.XDirect.cond != ARMcc_AL) {
3349 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3350 vassert(delta > 0 && delta < 40);
3351 vassert((delta & 3) == 0);
3352 UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
3353 vassert(notCond <= 13); /* Neither AL nor NV */
3354 delta = (delta >> 2) - 2;
3355 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3356 }
3357 goto done;
3358 }
3359
3360 case ARMin_XIndir: {
3361 /* We're generating transfers that could lead indirectly to a
3362 chain-me, so we need to be sure this is actually allowed
3363 -- no-redir translations are not allowed to reach normal
3364 translations without going through the scheduler. That
3365 means no XDirects or XIndirs out from no-redir
3366 translations. Hence: */
3367 vassert(disp_cp_xindir != NULL);
3368
3369 /* Use ptmp for backpatching conditional jumps. */
3370 UInt* ptmp = NULL;
3371
3372 /* First off, if this is conditional, create a conditional
3373 jump over the rest of it. Or at least, leave a space for
3374 it that we will shortly fill in. */
3375 if (i->ARMin.XIndir.cond != ARMcc_AL) {
3376 vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3377 ptmp = p;
3378 *p++ = 0;
3379 }
3380
3381 /* Update the guest R15T. */
3382 /* str r-dstGA, amR15T */
3383 p = do_load_or_store32(p, False/*!isLoad*/,
3384 iregEnc(i->ARMin.XIndir.dstGA),
3385 i->ARMin.XIndir.amR15T);
3386
3387 /* movw r12, lo16(VG_(disp_cp_xindir)) */
3388 /* movt r12, hi16(VG_(disp_cp_xindir)) */
3389 /* bx r12 (A1) */
3390 p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xindir);
3391 *p++ = 0xE12FFF1C;
3392
3393 /* Fix up the conditional jump, if there was one. */
3394 if (i->ARMin.XIndir.cond != ARMcc_AL) {
3395 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3396 vassert(delta > 0 && delta < 40);
3397 vassert((delta & 3) == 0);
3398 UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3399 vassert(notCond <= 13); /* Neither AL nor NV */
3400 delta = (delta >> 2) - 2;
3401 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3402 }
3403 goto done;
3404 }
3405
3406 case ARMin_XAssisted: {
3407 /* Use ptmp for backpatching conditional jumps. */
3408 UInt* ptmp = NULL;
3409
3410 /* First off, if this is conditional, create a conditional
3411 jump over the rest of it. Or at least, leave a space for
3412 it that we will shortly fill in. */
3413 if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3414 vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
3415 ptmp = p;
3416 *p++ = 0;
3417 }
3418
3419 /* Update the guest R15T. */
3420 /* str r-dstGA, amR15T */
3421 p = do_load_or_store32(p, False/*!isLoad*/,
3422 iregEnc(i->ARMin.XAssisted.dstGA),
3423 i->ARMin.XAssisted.amR15T);
3424
3425 /* movw r8, $magic_number */
3426 UInt trcval = 0;
3427 switch (i->ARMin.XAssisted.jk) {
3428 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3429 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3430 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
3431 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3432 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3433 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3434 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3435 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3436 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3437 //case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3438 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3439 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3440 /* We don't expect to see the following being assisted. */
3441 //case Ijk_Ret:
3442 //case Ijk_Call:
3443 /* fallthrough */
3444 default:
3445 ppIRJumpKind(i->ARMin.XAssisted.jk);
3446 vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
3447 }
3448 vassert(trcval != 0);
3449 p = imm32_to_ireg(p, /*r*/8, trcval);
3450
3451 /* movw r12, lo16(VG_(disp_cp_xassisted)) */
3452 /* movt r12, hi16(VG_(disp_cp_xassisted)) */
3453 /* bx r12 (A1) */
3454 p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xassisted);
3455 *p++ = 0xE12FFF1C;
3456
3457 /* Fix up the conditional jump, if there was one. */
3458 if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3459 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3460 vassert(delta > 0 && delta < 40);
3461 vassert((delta & 3) == 0);
3462 UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
3463 vassert(notCond <= 13); /* Neither AL nor NV */
3464 delta = (delta >> 2) - 2;
3465 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3466 }
3467 goto done;
3468 }
3469
3470 case ARMin_CMov: {
3471 UInt instr = skeletal_RI84(i->ARMin.CMov.src);
3472 UInt subopc = X1101; /* MOV */
3473 UInt SBZ = 0;
3474 instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
3475 (subopc << 1) & 0xF, SBZ,
3476 iregEnc(i->ARMin.CMov.dst));
3477 *p++ = instr;
3478 goto done;
3479 }
3480
3481 case ARMin_Call: {
3482 UInt instr;
3483 /* Decide on a scratch reg used to hold to the call address.
3484 This has to be done as per the comments in getRegUsage. */
3485 Int scratchNo;
3486 switch (i->ARMin.Call.nArgRegs) {
3487 case 0: scratchNo = 0; break;
3488 case 1: scratchNo = 1; break;
3489 case 2: scratchNo = 2; break;
3490 case 3: scratchNo = 3; break;
3491 case 4: scratchNo = 11; break;
3492 default: vassert(0);
3493 }
3494 /* If we don't need to do any fixup actions in the case that
3495 the call doesn't happen, just do the simple thing and emit
3496 straight-line code. We hope this is the common case. */
3497 if (i->ARMin.Call.cond == ARMcc_AL/*call always happens*/
3498 || i->ARMin.Call.rloc.pri == RLPri_None/*no fixup action*/) {
3499 // r"scratchNo" = &target
3500 p = imm32_to_ireg( (UInt*)p,
3501 scratchNo, (UInt)i->ARMin.Call.target );
3502 // blx{cond} r"scratchNo"
3503 instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
3504 X0011, scratchNo);
3505 instr |= 0xFFF << 8; // stick in the SBOnes
3506 *p++ = instr;
3507 } else {
3508 Int delta;
3509 /* Complex case. We have to generate an if-then-else
3510 diamond. */
3511 // before:
3512 // b{!cond} else:
3513 // r"scratchNo" = &target
3514 // blx{AL} r"scratchNo"
3515 // preElse:
3516 // b after:
3517 // else:
3518 // mov r0, #0x55555555 // possibly
3519 // mov r1, r0 // possibly
3520 // after:
3521
3522 // before:
3523 UInt* pBefore = p;
3524
3525 // b{!cond} else: // ptmp1 points here
3526 *p++ = 0; // filled in later
3527
3528 // r"scratchNo" = &target
3529 p = imm32_to_ireg( (UInt*)p,
3530 scratchNo, (UInt)i->ARMin.Call.target );
3531
3532 // blx{AL} r"scratchNo"
3533 instr = XXX___XX(ARMcc_AL, X0001, X0010, /*___*/
3534 X0011, scratchNo);
3535 instr |= 0xFFF << 8; // stick in the SBOnes
3536 *p++ = instr;
3537
3538 // preElse:
3539 UInt* pPreElse = p;
3540
3541 // b after:
3542 *p++ = 0; // filled in later
3543
3544 // else:
3545 delta = (UChar*)p - (UChar*)pBefore;
3546 delta = (delta >> 2) - 2;
3547 *pBefore
3548 = XX______(1 ^ i->ARMin.Call.cond, X1010) | (delta & 0xFFFFFF);
3549
3550 /* Do the 'else' actions */
3551 switch (i->ARMin.Call.rloc.pri) {
3552 case RLPri_Int:
3553 p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3554 break;
3555 case RLPri_2Int:
3556 vassert(0); //ATC
3557 p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3558 /* mov r1, r0 */
3559 *p++ = 0xE1A01000;
3560 break;
3561 case RLPri_None: case RLPri_INVALID: default:
3562 vassert(0);
3563 }
3564
3565 // after:
3566 delta = (UChar*)p - (UChar*)pPreElse;
3567 delta = (delta >> 2) - 2;
3568 *pPreElse = XX______(ARMcc_AL, X1010) | (delta & 0xFFFFFF);
3569 }
3570
3571 goto done;
3572 }
3573
3574 case ARMin_Mul: {
3575 /* E0000392 mul r0, r2, r3
3576 E0810392 umull r0(LO), r1(HI), r2, r3
3577 E0C10392 smull r0(LO), r1(HI), r2, r3
3578 */
3579 switch (i->ARMin.Mul.op) {
3580 case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
3581 case ARMmul_ZX: *p++ = 0xE0810392; goto done;
3582 case ARMmul_SX: *p++ = 0xE0C10392; goto done;
3583 default: vassert(0);
3584 }
3585 goto bad;
3586 }
3587 case ARMin_LdrEX: {
3588 /* E1D42F9F ldrexb r2, [r4]
3589 E1F42F9F ldrexh r2, [r4]
3590 E1942F9F ldrex r2, [r4]
3591 E1B42F9F ldrexd r2, r3, [r4]
3592 */
3593 switch (i->ARMin.LdrEX.szB) {
3594 case 1: *p++ = 0xE1D42F9F; goto done;
3595 case 2: *p++ = 0xE1F42F9F; goto done;
3596 case 4: *p++ = 0xE1942F9F; goto done;
3597 case 8: *p++ = 0xE1B42F9F; goto done;
3598 default: break;
3599 }
3600 goto bad;
3601 }
3602 case ARMin_StrEX: {
3603 /* E1C40F92 strexb r0, r2, [r4]
3604 E1E40F92 strexh r0, r2, [r4]
3605 E1840F92 strex r0, r2, [r4]
3606 E1A40F92 strexd r0, r2, r3, [r4]
3607 */
3608 switch (i->ARMin.StrEX.szB) {
3609 case 1: *p++ = 0xE1C40F92; goto done;
3610 case 2: *p++ = 0xE1E40F92; goto done;
3611 case 4: *p++ = 0xE1840F92; goto done;
3612 case 8: *p++ = 0xE1A40F92; goto done;
3613 default: break;
3614 }
3615 goto bad;
3616 }
3617 case ARMin_VLdStD: {
3618 UInt dD = dregEnc(i->ARMin.VLdStD.dD);
3619 UInt rN = iregEnc(i->ARMin.VLdStD.amode->reg);
3620 Int simm11 = i->ARMin.VLdStD.amode->simm11;
3621 UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3622 UInt bU = simm11 >= 0 ? 1 : 0;
3623 UInt bL = i->ARMin.VLdStD.isLoad ? 1 : 0;
3624 UInt insn;
3625 vassert(0 == (off8 & 3));
3626 off8 >>= 2;
3627 vassert(0 == (off8 & 0xFFFFFF00));
3628 insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
3629 insn |= off8;
3630 *p++ = insn;
3631 goto done;
3632 }
3633 case ARMin_VLdStS: {
3634 UInt fD = fregEnc(i->ARMin.VLdStS.fD);
3635 UInt rN = iregEnc(i->ARMin.VLdStS.amode->reg);
3636 Int simm11 = i->ARMin.VLdStS.amode->simm11;
3637 UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3638 UInt bU = simm11 >= 0 ? 1 : 0;
3639 UInt bL = i->ARMin.VLdStS.isLoad ? 1 : 0;
3640 UInt bD = fD & 1;
3641 UInt insn;
3642 vassert(0 == (off8 & 3));
3643 off8 >>= 2;
3644 vassert(0 == (off8 & 0xFFFFFF00));
3645 insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
3646 insn |= off8;
3647 *p++ = insn;
3648 goto done;
3649 }
3650 case ARMin_VAluD: {
3651 UInt dN = dregEnc(i->ARMin.VAluD.argL);
3652 UInt dD = dregEnc(i->ARMin.VAluD.dst);
3653 UInt dM = dregEnc(i->ARMin.VAluD.argR);
3654 UInt pqrs = X1111; /* undefined */
3655 switch (i->ARMin.VAluD.op) {
3656 case ARMvfp_ADD: pqrs = X0110; break;
3657 case ARMvfp_SUB: pqrs = X0111; break;
3658 case ARMvfp_MUL: pqrs = X0100; break;
3659 case ARMvfp_DIV: pqrs = X1000; break;
3660 default: goto bad;
3661 }
3662 vassert(pqrs != X1111);
3663 UInt bP = (pqrs >> 3) & 1;
3664 UInt bQ = (pqrs >> 2) & 1;
3665 UInt bR = (pqrs >> 1) & 1;
3666 UInt bS = (pqrs >> 0) & 1;
3667 UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
3668 X1011, BITS4(0,bS,0,0), dM);
3669 *p++ = insn;
3670 goto done;
3671 }
3672 case ARMin_VAluS: {
3673 UInt dN = fregEnc(i->ARMin.VAluS.argL);
3674 UInt dD = fregEnc(i->ARMin.VAluS.dst);
3675 UInt dM = fregEnc(i->ARMin.VAluS.argR);
3676 UInt bN = dN & 1;
3677 UInt bD = dD & 1;
3678 UInt bM = dM & 1;
3679 UInt pqrs = X1111; /* undefined */
3680 switch (i->ARMin.VAluS.op) {
3681 case ARMvfp_ADD: pqrs = X0110; break;
3682 case ARMvfp_SUB: pqrs = X0111; break;
3683 case ARMvfp_MUL: pqrs = X0100; break;
3684 case ARMvfp_DIV: pqrs = X1000; break;
3685 default: goto bad;
3686 }
3687 vassert(pqrs != X1111);
3688 UInt bP = (pqrs >> 3) & 1;
3689 UInt bQ = (pqrs >> 2) & 1;
3690 UInt bR = (pqrs >> 1) & 1;
3691 UInt bS = (pqrs >> 0) & 1;
3692 UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
3693 (dN >> 1), (dD >> 1),
3694 X1010, BITS4(bN,bS,bM,0), (dM >> 1));
3695 *p++ = insn;
3696 goto done;
3697 }
3698 case ARMin_VUnaryD: {
3699 UInt dD = dregEnc(i->ARMin.VUnaryD.dst);
3700 UInt dM = dregEnc(i->ARMin.VUnaryD.src);
3701 UInt insn = 0;
3702 switch (i->ARMin.VUnaryD.op) {
3703 case ARMvfpu_COPY:
3704 insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
3705 break;
3706 case ARMvfpu_ABS:
3707 insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
3708 break;
3709 case ARMvfpu_NEG:
3710 insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
3711 break;
3712 case ARMvfpu_SQRT:
3713 insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
3714 break;
3715 default:
3716 goto bad;
3717 }
3718 *p++ = insn;
3719 goto done;
3720 }
3721 case ARMin_VUnaryS: {
3722 UInt fD = fregEnc(i->ARMin.VUnaryS.dst);
3723 UInt fM = fregEnc(i->ARMin.VUnaryS.src);
3724 UInt insn = 0;
3725 switch (i->ARMin.VUnaryS.op) {
3726 case ARMvfpu_COPY:
3727 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3728 (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3729 (fM >> 1));
3730 break;
3731 case ARMvfpu_ABS:
3732 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3733 (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3734 (fM >> 1));
3735 break;
3736 case ARMvfpu_NEG:
3737 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3738 (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3739 (fM >> 1));
3740 break;
3741 case ARMvfpu_SQRT:
3742 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3743 (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3744 (fM >> 1));
3745 break;
3746 default:
3747 goto bad;
3748 }
3749 *p++ = insn;
3750 goto done;
3751 }
3752 case ARMin_VCmpD: {
3753 UInt dD = dregEnc(i->ARMin.VCmpD.argL);
3754 UInt dM = dregEnc(i->ARMin.VCmpD.argR);
3755 UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
3756 *p++ = insn; /* FCMPD dD, dM */
3757 *p++ = 0xEEF1FA10; /* FMSTAT */
3758 goto done;
3759 }
3760 case ARMin_VCMovD: {
3761 UInt cc = (UInt)i->ARMin.VCMovD.cond;
3762 UInt dD = dregEnc(i->ARMin.VCMovD.dst);
3763 UInt dM = dregEnc(i->ARMin.VCMovD.src);
3764 vassert(cc < 16 && cc != ARMcc_AL);
3765 UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
3766 *p++ = insn;
3767 goto done;
3768 }
3769 case ARMin_VCMovS: {
3770 UInt cc = (UInt)i->ARMin.VCMovS.cond;
3771 UInt fD = fregEnc(i->ARMin.VCMovS.dst);
3772 UInt fM = fregEnc(i->ARMin.VCMovS.src);
3773 vassert(cc < 16 && cc != ARMcc_AL);
3774 UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
3775 X0000,(fD >> 1),X1010,
3776 BITS4(0,1,(fM & 1),0), (fM >> 1));
3777 *p++ = insn;
3778 goto done;
3779 }
3780 case ARMin_VCvtSD: {
3781 if (i->ARMin.VCvtSD.sToD) {
3782 UInt dD = dregEnc(i->ARMin.VCvtSD.dst);
3783 UInt fM = fregEnc(i->ARMin.VCvtSD.src);
3784 UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
3785 BITS4(1,1, (fM & 1), 0),
3786 (fM >> 1));
3787 *p++ = insn;
3788 goto done;
3789 } else {
3790 UInt fD = fregEnc(i->ARMin.VCvtSD.dst);
3791 UInt dM = dregEnc(i->ARMin.VCvtSD.src);
3792 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
3793 X0111, (fD >> 1),
3794 X1011, X1100, dM);
3795 *p++ = insn;
3796 goto done;
3797 }
3798 }
3799 case ARMin_VXferQ: {
3800 UInt insn;
3801 UInt qD = qregEnc(i->ARMin.VXferQ.qD);
3802 UInt dHi = dregEnc(i->ARMin.VXferQ.dHi);
3803 UInt dLo = dregEnc(i->ARMin.VXferQ.dLo);
3804 /* This is a bit tricky. We need to make 2 D-D moves and we rely
3805 on the fact that the Q register can be treated as two D registers.
3806 We also rely on the fact that the register allocator will allocate
3807 the two D's and the Q to disjoint parts of the register file,
3808 and so we don't have to worry about the first move's destination
3809 being the same as the second move's source, etc. We do have
3810 assertions though. */
3811 /* The ARM ARM specifies that
3812 D<2n> maps to the least significant half of Q<n>
3813 D<2n+1> maps to the most significant half of Q<n>
3814 So there are no issues with endianness here.
3815 */
3816 UInt qDlo = 2 * qD + 0;
3817 UInt qDhi = 2 * qD + 1;
3818 /* Stay sane .. */
3819 vassert(qDhi != dHi && qDhi != dLo);
3820 vassert(qDlo != dHi && qDlo != dLo);
3821 /* vmov dX, dY is
3822 F 2 (0,dX[4],1,0) dY[3:0] dX[3:0] 1 (dY[4],0,dY[4],1) dY[3:0]
3823 */
3824 # define VMOV_D_D(_xx,_yy) \
3825 XXXXXXXX( 0xF, 0x2, BITS4(0, (((_xx) >> 4) & 1), 1, 0), \
3826 ((_yy) & 0xF), ((_xx) & 0xF), 0x1, \
3827 BITS4( (((_yy) >> 4) & 1), 0, (((_yy) >> 4) & 1), 1), \
3828 ((_yy) & 0xF) )
3829 if (i->ARMin.VXferQ.toQ) {
3830 insn = VMOV_D_D(qDlo, dLo); *p++ = insn;
3831 insn = VMOV_D_D(qDhi, dHi); *p++ = insn;
3832 } else {
3833 insn = VMOV_D_D(dLo, qDlo); *p++ = insn;
3834 insn = VMOV_D_D(dHi, qDhi); *p++ = insn;
3835 }
3836 # undef VMOV_D_D
3837 goto done;
3838 }
3839 case ARMin_VXferD: {
3840 UInt dD = dregEnc(i->ARMin.VXferD.dD);
3841 UInt rHi = iregEnc(i->ARMin.VXferD.rHi);
3842 UInt rLo = iregEnc(i->ARMin.VXferD.rLo);
3843 /* vmov dD, rLo, rHi is
3844 E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
3845 vmov rLo, rHi, dD is
3846 E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
3847 */
3848 UInt insn
3849 = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
3850 rHi, rLo, 0xB,
3851 BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
3852 *p++ = insn;
3853 goto done;
3854 }
3855 case ARMin_VXferS: {
3856 UInt fD = fregEnc(i->ARMin.VXferS.fD);
3857 UInt rLo = iregEnc(i->ARMin.VXferS.rLo);
3858 /* vmov fD, rLo is
3859 E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
3860 vmov rLo, fD is
3861 E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
3862 */
3863 UInt insn
3864 = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
3865 (fD >> 1) & 0xF, rLo, 0xA,
3866 BITS4((fD & 1),0,0,1), 0);
3867 *p++ = insn;
3868 goto done;
3869 }
3870 case ARMin_VCvtID: {
3871 Bool iToD = i->ARMin.VCvtID.iToD;
3872 Bool syned = i->ARMin.VCvtID.syned;
3873 if (iToD && syned) {
3874 // FSITOD: I32S-in-freg to F64-in-dreg
3875 UInt regF = fregEnc(i->ARMin.VCvtID.src);
3876 UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3877 UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3878 X1011, BITS4(1,1,(regF & 1),0),
3879 (regF >> 1) & 0xF);
3880 *p++ = insn;
3881 goto done;
3882 }
3883 if (iToD && (!syned)) {
3884 // FUITOD: I32U-in-freg to F64-in-dreg
3885 UInt regF = fregEnc(i->ARMin.VCvtID.src);
3886 UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3887 UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3888 X1011, BITS4(0,1,(regF & 1),0),
3889 (regF >> 1) & 0xF);
3890 *p++ = insn;
3891 goto done;
3892 }
3893 if ((!iToD) && syned) {
3894 // FTOSID: F64-in-dreg to I32S-in-freg
3895 UInt regD = dregEnc(i->ARMin.VCvtID.src);
3896 UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3897 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3898 X1101, (regF >> 1) & 0xF,
3899 X1011, X0100, regD);
3900 *p++ = insn;
3901 goto done;
3902 }
3903 if ((!iToD) && (!syned)) {
3904 // FTOUID: F64-in-dreg to I32U-in-freg
3905 UInt regD = dregEnc(i->ARMin.VCvtID.src);
3906 UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3907 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3908 X1100, (regF >> 1) & 0xF,
3909 X1011, X0100, regD);
3910 *p++ = insn;
3911 goto done;
3912 }
3913 /*UNREACHED*/
3914 vassert(0);
3915 }
3916 case ARMin_VRIntR: { /* NB: ARM v8 and above only */
3917 Bool isF64 = i->ARMin.VRIntR.isF64;
3918 UInt rDst = (isF64 ? dregEnc : fregEnc)(i->ARMin.VRIntR.dst);
3919 UInt rSrc = (isF64 ? dregEnc : fregEnc)(i->ARMin.VRIntR.src);
3920 /* The encoding of registers here differs strangely for the
3921 F32 and F64 cases. */
3922 UInt D, Vd, M, Vm;
3923 if (isF64) {
3924 D = (rDst >> 4) & 1;
3925 Vd = rDst & 0xF;
3926 M = (rSrc >> 4) & 1;
3927 Vm = rSrc & 0xF;
3928 } else {
3929 Vd = (rDst >> 1) & 0xF;
3930 D = rDst & 1;
3931 Vm = (rSrc >> 1) & 0xF;
3932 M = rSrc & 1;
3933 }
3934 vassert(D <= 1 && Vd <= 15 && M <= 1 && Vm <= 15);
3935 *p++ = XXXXXXXX(0xE, X1110, X1011 | (D << 2), X0110, Vd,
3936 isF64 ? X1011 : X1010, X0100 | (M << 1), Vm);
3937 goto done;
3938 }
3939 case ARMin_VMinMaxNum: {
3940 Bool isF64 = i->ARMin.VMinMaxNum.isF64;
3941 Bool isMax = i->ARMin.VMinMaxNum.isMax;
3942 UInt rDst = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.dst);
3943 UInt rSrcL = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.srcL);
3944 UInt rSrcR = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.srcR);
3945 /* The encoding of registers here differs strangely for the
3946 F32 and F64 cases. */
3947 UInt D, Vd, N, Vn, M, Vm;
3948 if (isF64) {
3949 D = (rDst >> 4) & 1;
3950 Vd = rDst & 0xF;
3951 N = (rSrcL >> 4) & 1;
3952 Vn = rSrcL & 0xF;
3953 M = (rSrcR >> 4) & 1;
3954 Vm = rSrcR & 0xF;
3955 } else {
3956 Vd = (rDst >> 1) & 0xF;
3957 D = rDst & 1;
3958 Vn = (rSrcL >> 1) & 0xF;
3959 N = rSrcL & 1;
3960 Vm = (rSrcR >> 1) & 0xF;
3961 M = rSrcR & 1;
3962 }
3963 vassert(D <= 1 && Vd <= 15 && M <= 1 && Vm <= 15 && N <= 1
3964 && Vn <= 15);
3965 *p++ = XXXXXXXX(X1111,X1110, X1000 | (D << 2), Vn, Vd,
3966 X1010 | (isF64 ? 1 : 0),
3967 (N << 3) | ((isMax ? 0 : 1) << 2) | (M << 1) | 0,
3968 Vm);
3969 goto done;
3970 }
3971 case ARMin_FPSCR: {
3972 Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
3973 UInt iReg = iregEnc(i->ARMin.FPSCR.iReg);
3974 if (toFPSCR) {
3975 /* fmxr fpscr, iReg is EEE1 iReg A10 */
3976 *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
3977 goto done;
3978 }
3979 goto bad; // FPSCR -> iReg case currently ATC
3980 }
3981 case ARMin_MFence: {
3982 // It's not clear (to me) how these relate to the ARMv7
3983 // versions, so let's just use the v7 versions as they
3984 // are at least well documented.
3985 //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
3986 //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
3987 //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4 (ISB) */
3988 *p++ = 0xF57FF04F; /* DSB sy */
3989 *p++ = 0xF57FF05F; /* DMB sy */
3990 *p++ = 0xF57FF06F; /* ISB */
3991 goto done;
3992 }
3993 case ARMin_CLREX: {
3994 *p++ = 0xF57FF01F; /* clrex */
3995 goto done;
3996 }
3997
3998 case ARMin_NLdStQ: {
3999 UInt regD = qregEnc(i->ARMin.NLdStQ.dQ) << 1;
4000 UInt regN, regM;
4001 UInt D = regD >> 4;
4002 UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
4003 UInt insn;
4004 vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
4005 regD &= 0xF;
4006 if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
4007 regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
4008 regM = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
4009 } else {
4010 regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
4011 regM = 15;
4012 }
4013 insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
4014 regN, regD, X1010, X1000, regM);
4015 *p++ = insn;
4016 goto done;
4017 }
4018 case ARMin_NLdStD: {
4019 UInt regD = dregEnc(i->ARMin.NLdStD.dD);
4020 UInt regN, regM;
4021 UInt D = regD >> 4;
4022 UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
4023 UInt insn;
4024 vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
4025 regD &= 0xF;
4026 if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
4027 regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
4028 regM = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
4029 } else {
4030 regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.R.rN);
4031 regM = 15;
4032 }
4033 insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
4034 regN, regD, X0111, X1000, regM);
4035 *p++ = insn;
4036 goto done;
4037 }
4038 case ARMin_NUnaryS: {
4039 UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
4040 UInt regD, D;
4041 UInt regM, M;
4042 UInt size = i->ARMin.NUnaryS.size;
4043 UInt insn;
4044 UInt opc, opc1, opc2;
4045 switch (i->ARMin.NUnaryS.op) {
4046 case ARMneon_VDUP:
4047 if (i->ARMin.NUnaryS.size >= 16)
4048 goto bad;
4049 if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
4050 goto bad;
4051 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
4052 goto bad;
4053 regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
4054 ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1)
4055 : dregEnc(i->ARMin.NUnaryS.dst->reg);
4056 regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
4057 ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1)
4058 : dregEnc(i->ARMin.NUnaryS.src->reg);
4059 D = regD >> 4;
4060 M = regM >> 4;
4061 regD &= 0xf;
4062 regM &= 0xf;
4063 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
4064 (i->ARMin.NUnaryS.size & 0xf), regD,
4065 X1100, BITS4(0,Q,M,0), regM);
4066 *p++ = insn;
4067 goto done;
4068 case ARMneon_SETELEM:
4069 regD = Q ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1) :
4070 dregEnc(i->ARMin.NUnaryS.dst->reg);
4071 regM = iregEnc(i->ARMin.NUnaryS.src->reg);
4072 M = regM >> 4;
4073 D = regD >> 4;
4074 regM &= 0xF;
4075 regD &= 0xF;
4076 if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
4077 goto bad;
4078 switch (size) {
4079 case 0:
4080 if (i->ARMin.NUnaryS.dst->index > 7)
4081 goto bad;
4082 opc = X1000 | i->ARMin.NUnaryS.dst->index;
4083 break;
4084 case 1:
4085 if (i->ARMin.NUnaryS.dst->index > 3)
4086 goto bad;
4087 opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
4088 break;
4089 case 2:
4090 if (i->ARMin.NUnaryS.dst->index > 1)
4091 goto bad;
4092 opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
4093 break;
4094 default:
4095 goto bad;
4096 }
4097 opc1 = (opc >> 2) & 3;
4098 opc2 = opc & 3;
4099 insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
4100 regD, regM, X1011,
4101 BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
4102 *p++ = insn;
4103 goto done;
4104 case ARMneon_GETELEMU:
4105 regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
4106 dregEnc(i->ARMin.NUnaryS.src->reg);
4107 regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
4108 M = regM >> 4;
4109 D = regD >> 4;
4110 regM &= 0xF;
4111 regD &= 0xF;
4112 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
4113 goto bad;
4114 switch (size) {
4115 case 0:
4116 if (Q && i->ARMin.NUnaryS.src->index > 7) {
4117 regM++;
4118 i->ARMin.NUnaryS.src->index -= 8;
4119 }
4120 if (i->ARMin.NUnaryS.src->index > 7)
4121 goto bad;
4122 opc = X1000 | i->ARMin.NUnaryS.src->index;
4123 break;
4124 case 1:
4125 if (Q && i->ARMin.NUnaryS.src->index > 3) {
4126 regM++;
4127 i->ARMin.NUnaryS.src->index -= 4;
4128 }
4129 if (i->ARMin.NUnaryS.src->index > 3)
4130 goto bad;
4131 opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
4132 break;
4133 case 2:
4134 goto bad;
4135 default:
4136 goto bad;
4137 }
4138 opc1 = (opc >> 2) & 3;
4139 opc2 = opc & 3;
4140 insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
4141 regM, regD, X1011,
4142 BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
4143 *p++ = insn;
4144 goto done;
4145 case ARMneon_GETELEMS:
4146 regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
4147 dregEnc(i->ARMin.NUnaryS.src->reg);
4148 regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
4149 M = regM >> 4;
4150 D = regD >> 4;
4151 regM &= 0xF;
4152 regD &= 0xF;
4153 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
4154 goto bad;
4155 switch (size) {
4156 case 0:
4157 if (Q && i->ARMin.NUnaryS.src->index > 7) {
4158 regM++;
4159 i->ARMin.NUnaryS.src->index -= 8;
4160 }
4161 if (i->ARMin.NUnaryS.src->index > 7)
4162 goto bad;
4163 opc = X1000 | i->ARMin.NUnaryS.src->index;
4164 break;
4165 case 1:
4166 if (Q && i->ARMin.NUnaryS.src->index > 3) {
4167 regM++;
4168 i->ARMin.NUnaryS.src->index -= 4;
4169 }
4170 if (i->ARMin.NUnaryS.src->index > 3)
4171 goto bad;
4172 opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
4173 break;
4174 case 2:
4175 if (Q && i->ARMin.NUnaryS.src->index > 1) {
4176 regM++;
4177 i->ARMin.NUnaryS.src->index -= 2;
4178 }
4179 if (i->ARMin.NUnaryS.src->index > 1)
4180 goto bad;
4181 opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
4182 break;
4183 default:
4184 goto bad;
4185 }
4186 opc1 = (opc >> 2) & 3;
4187 opc2 = opc & 3;
4188 insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
4189 regM, regD, X1011,
4190 BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
4191 *p++ = insn;
4192 goto done;
4193 default:
4194 goto bad;
4195 }
4196 }
4197 case ARMin_NUnary: {
4198 UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
4199 UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
4200 ? (qregEnc(i->ARMin.NUnary.dst) << 1)
4201 : dregEnc(i->ARMin.NUnary.dst);
4202 UInt regM, M;
4203 UInt D = regD >> 4;
4204 UInt sz1 = i->ARMin.NUnary.size >> 1;
4205 UInt sz2 = i->ARMin.NUnary.size & 1;
4206 UInt sz = i->ARMin.NUnary.size;
4207 UInt insn;
4208 UInt F = 0; /* TODO: floating point EQZ ??? */
4209 if (i->ARMin.NUnary.op != ARMneon_DUP) {
4210 regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
4211 ? (qregEnc(i->ARMin.NUnary.src) << 1)
4212 : dregEnc(i->ARMin.NUnary.src);
4213 M = regM >> 4;
4214 } else {
4215 regM = iregEnc(i->ARMin.NUnary.src);
4216 M = regM >> 4;
4217 }
4218 regD &= 0xF;
4219 regM &= 0xF;
4220 switch (i->ARMin.NUnary.op) {
4221 case ARMneon_COPY: /* VMOV reg, reg */
4222 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
4223 BITS4(M,Q,M,1), regM);
4224 break;
4225 case ARMneon_COPYN: /* VMOVN regD, regQ */
4226 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4227 regD, X0010, BITS4(0,0,M,0), regM);
4228 break;
4229 case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
4230 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4231 regD, X0010, BITS4(1,0,M,0), regM);
4232 break;
4233 case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
4234 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4235 regD, X0010, BITS4(0,1,M,0), regM);
4236 break;
4237 case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
4238 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4239 regD, X0010, BITS4(1,1,M,0), regM);
4240 break;
4241 case ARMneon_COPYLS: /* VMOVL regQ, regD */
4242 if (sz >= 3)
4243 goto bad;
4244 insn = XXXXXXXX(0xF, X0010,
4245 BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4246 BITS4((sz == 0) ? 1 : 0,0,0,0),
4247 regD, X1010, BITS4(0,0,M,1), regM);
4248 break;
4249 case ARMneon_COPYLU: /* VMOVL regQ, regD */
4250 if (sz >= 3)
4251 goto bad;
4252 insn = XXXXXXXX(0xF, X0011,
4253 BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4254 BITS4((sz == 0) ? 1 : 0,0,0,0),
4255 regD, X1010, BITS4(0,0,M,1), regM);
4256 break;
4257 case ARMneon_NOT: /* VMVN reg, reg*/
4258 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4259 BITS4(1,Q,M,0), regM);
4260 break;
4261 case ARMneon_EQZ:
4262 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4263 regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
4264 break;
4265 case ARMneon_CNT:
4266 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4267 BITS4(0,Q,M,0), regM);
4268 break;
4269 case ARMneon_CLZ:
4270 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4271 regD, X0100, BITS4(1,Q,M,0), regM);
4272 break;
4273 case ARMneon_CLS:
4274 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4275 regD, X0100, BITS4(0,Q,M,0), regM);
4276 break;
4277 case ARMneon_ABS:
4278 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4279 regD, X0011, BITS4(0,Q,M,0), regM);
4280 break;
4281 case ARMneon_DUP:
4282 sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
4283 sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
4284 vassert(sz1 + sz2 < 2);
4285 insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
4286 X1011, BITS4(D,0,sz2,1), X0000);
4287 break;
4288 case ARMneon_REV16:
4289 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4290 regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
4291 break;
4292 case ARMneon_REV32:
4293 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4294 regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
4295 break;
4296 case ARMneon_REV64:
4297 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4298 regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
4299 break;
4300 case ARMneon_PADDLU:
4301 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4302 regD, X0010, BITS4(1,Q,M,0), regM);
4303 break;
4304 case ARMneon_PADDLS:
4305 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4306 regD, X0010, BITS4(0,Q,M,0), regM);
4307 break;
4308 case ARMneon_VQSHLNUU:
4309 insn = XXXXXXXX(0xF, X0011,
4310 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4311 sz & 0xf, regD, X0111,
4312 BITS4(sz >> 6,Q,M,1), regM);
4313 break;
4314 case ARMneon_VQSHLNSS:
4315 insn = XXXXXXXX(0xF, X0010,
4316 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4317 sz & 0xf, regD, X0111,
4318 BITS4(sz >> 6,Q,M,1), regM);
4319 break;
4320 case ARMneon_VQSHLNUS:
4321 insn = XXXXXXXX(0xF, X0011,
4322 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4323 sz & 0xf, regD, X0110,
4324 BITS4(sz >> 6,Q,M,1), regM);
4325 break;
4326 case ARMneon_VCVTFtoS:
4327 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4328 BITS4(0,Q,M,0), regM);
4329 break;
4330 case ARMneon_VCVTFtoU:
4331 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4332 BITS4(1,Q,M,0), regM);
4333 break;
4334 case ARMneon_VCVTStoF:
4335 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4336 BITS4(0,Q,M,0), regM);
4337 break;
4338 case ARMneon_VCVTUtoF:
4339 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4340 BITS4(1,Q,M,0), regM);
4341 break;
4342 case ARMneon_VCVTFtoFixedU:
4343 sz1 = (sz >> 5) & 1;
4344 sz2 = (sz >> 4) & 1;
4345 sz &= 0xf;
4346 insn = XXXXXXXX(0xF, X0011,
4347 BITS4(1,D,sz1,sz2), sz, regD, X1111,
4348 BITS4(0,Q,M,1), regM);
4349 break;
4350 case ARMneon_VCVTFtoFixedS:
4351 sz1 = (sz >> 5) & 1;
4352 sz2 = (sz >> 4) & 1;
4353 sz &= 0xf;
4354 insn = XXXXXXXX(0xF, X0010,
4355 BITS4(1,D,sz1,sz2), sz, regD, X1111,
4356 BITS4(0,Q,M,1), regM);
4357 break;
4358 case ARMneon_VCVTFixedUtoF:
4359 sz1 = (sz >> 5) & 1;
4360 sz2 = (sz >> 4) & 1;
4361 sz &= 0xf;
4362 insn = XXXXXXXX(0xF, X0011,
4363 BITS4(1,D,sz1,sz2), sz, regD, X1110,
4364 BITS4(0,Q,M,1), regM);
4365 break;
4366 case ARMneon_VCVTFixedStoF:
4367 sz1 = (sz >> 5) & 1;
4368 sz2 = (sz >> 4) & 1;
4369 sz &= 0xf;
4370 insn = XXXXXXXX(0xF, X0010,
4371 BITS4(1,D,sz1,sz2), sz, regD, X1110,
4372 BITS4(0,Q,M,1), regM);
4373 break;
4374 case ARMneon_VCVTF32toF16:
4375 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
4376 BITS4(0,0,M,0), regM);
4377 break;
4378 case ARMneon_VCVTF16toF32:
4379 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
4380 BITS4(0,0,M,0), regM);
4381 break;
4382 case ARMneon_VRECIP:
4383 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4384 BITS4(0,Q,M,0), regM);
4385 break;
4386 case ARMneon_VRECIPF:
4387 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4388 BITS4(0,Q,M,0), regM);
4389 break;
4390 case ARMneon_VABSFP:
4391 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4392 BITS4(0,Q,M,0), regM);
4393 break;
4394 case ARMneon_VRSQRTEFP:
4395 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4396 BITS4(1,Q,M,0), regM);
4397 break;
4398 case ARMneon_VRSQRTE:
4399 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4400 BITS4(1,Q,M,0), regM);
4401 break;
4402 case ARMneon_VNEGF:
4403 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4404 BITS4(1,Q,M,0), regM);
4405 break;
4406
4407 default:
4408 goto bad;
4409 }
4410 *p++ = insn;
4411 goto done;
4412 }
4413 case ARMin_NDual: {
4414 UInt Q = i->ARMin.NDual.Q ? 1 : 0;
4415 UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
4416 ? (qregEnc(i->ARMin.NDual.arg1) << 1)
4417 : dregEnc(i->ARMin.NDual.arg1);
4418 UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
4419 ? (qregEnc(i->ARMin.NDual.arg2) << 1)
4420 : dregEnc(i->ARMin.NDual.arg2);
4421 UInt D = regD >> 4;
4422 UInt M = regM >> 4;
4423 UInt sz1 = i->ARMin.NDual.size >> 1;
4424 UInt sz2 = i->ARMin.NDual.size & 1;
4425 UInt insn;
4426 regD &= 0xF;
4427 regM &= 0xF;
4428 switch (i->ARMin.NDual.op) {
4429 case ARMneon_TRN: /* VTRN reg, reg */
4430 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4431 regD, X0000, BITS4(1,Q,M,0), regM);
4432 break;
4433 case ARMneon_ZIP: /* VZIP reg, reg */
4434 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4435 regD, X0001, BITS4(1,Q,M,0), regM);
4436 break;
4437 case ARMneon_UZP: /* VUZP reg, reg */
4438 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4439 regD, X0001, BITS4(0,Q,M,0), regM);
4440 break;
4441 default:
4442 goto bad;
4443 }
4444 *p++ = insn;
4445 goto done;
4446 }
4447 case ARMin_NBinary: {
4448 UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
4449 UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
4450 ? (qregEnc(i->ARMin.NBinary.dst) << 1)
4451 : dregEnc(i->ARMin.NBinary.dst);
4452 UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
4453 ? (qregEnc(i->ARMin.NBinary.argL) << 1)
4454 : dregEnc(i->ARMin.NBinary.argL);
4455 UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
4456 ? (qregEnc(i->ARMin.NBinary.argR) << 1)
4457 : dregEnc(i->ARMin.NBinary.argR);
4458 UInt sz1 = i->ARMin.NBinary.size >> 1;
4459 UInt sz2 = i->ARMin.NBinary.size & 1;
4460 UInt D = regD >> 4;
4461 UInt N = regN >> 4;
4462 UInt M = regM >> 4;
4463 UInt insn;
4464 regD &= 0xF;
4465 regM &= 0xF;
4466 regN &= 0xF;
4467 switch (i->ARMin.NBinary.op) {
4468 case ARMneon_VAND: /* VAND reg, reg, reg */
4469 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
4470 BITS4(N,Q,M,1), regM);
4471 break;
4472 case ARMneon_VORR: /* VORR reg, reg, reg*/
4473 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
4474 BITS4(N,Q,M,1), regM);
4475 break;
4476 case ARMneon_VXOR: /* VEOR reg, reg, reg */
4477 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
4478 BITS4(N,Q,M,1), regM);
4479 break;
4480 case ARMneon_VADD: /* VADD reg, reg, reg */
4481 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4482 X1000, BITS4(N,Q,M,0), regM);
4483 break;
4484 case ARMneon_VSUB: /* VSUB reg, reg, reg */
4485 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4486 X1000, BITS4(N,Q,M,0), regM);
4487 break;
4488 case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
4489 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4490 X0110, BITS4(N,Q,M,1), regM);
4491 break;
4492 case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
4493 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4494 X0110, BITS4(N,Q,M,1), regM);
4495 break;
4496 case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
4497 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4498 X0110, BITS4(N,Q,M,0), regM);
4499 break;
4500 case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
4501 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4502 X0110, BITS4(N,Q,M,0), regM);
4503 break;
4504 case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
4505 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4506 X0001, BITS4(N,Q,M,0), regM);
4507 break;
4508 case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
4509 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4510 X0001, BITS4(N,Q,M,0), regM);
4511 break;
4512 case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
4513 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4514 X0000, BITS4(N,Q,M,1), regM);
4515 break;
4516 case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
4517 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4518 X0000, BITS4(N,Q,M,1), regM);
4519 break;
4520 case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
4521 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4522 X0010, BITS4(N,Q,M,1), regM);
4523 break;
4524 case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
4525 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4526 X0010, BITS4(N,Q,M,1), regM);
4527 break;
4528 case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
4529 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4530 X0011, BITS4(N,Q,M,0), regM);
4531 break;
4532 case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
4533 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4534 X0011, BITS4(N,Q,M,0), regM);
4535 break;
4536 case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
4537 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4538 X0011, BITS4(N,Q,M,1), regM);
4539 break;
4540 case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
4541 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4542 X0011, BITS4(N,Q,M,1), regM);
4543 break;
4544 case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
4545 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4546 X1000, BITS4(N,Q,M,1), regM);
4547 break;
4548 case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
4549 if (i->ARMin.NBinary.size >= 16)
4550 goto bad;
4551 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
4552 i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
4553 regM);
4554 break;
4555 case ARMneon_VMUL:
4556 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4557 X1001, BITS4(N,Q,M,1), regM);
4558 break;
4559 case ARMneon_VMULLU:
4560 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
4561 X1100, BITS4(N,0,M,0), regM);
4562 break;
4563 case ARMneon_VMULLS:
4564 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4565 X1100, BITS4(N,0,M,0), regM);
4566 break;
4567 case ARMneon_VMULP:
4568 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4569 X1001, BITS4(N,Q,M,1), regM);
4570 break;
4571 case ARMneon_VMULFP:
4572 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4573 X1101, BITS4(N,Q,M,1), regM);
4574 break;
4575 case ARMneon_VMULLP:
4576 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4577 X1110, BITS4(N,0,M,0), regM);
4578 break;
4579 case ARMneon_VQDMULH:
4580 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4581 X1011, BITS4(N,Q,M,0), regM);
4582 break;
4583 case ARMneon_VQRDMULH:
4584 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4585 X1011, BITS4(N,Q,M,0), regM);
4586 break;
4587 case ARMneon_VQDMULL:
4588 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4589 X1101, BITS4(N,0,M,0), regM);
4590 break;
4591 case ARMneon_VTBL:
4592 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
4593 X1000, BITS4(N,0,M,0), regM);
4594 break;
4595 case ARMneon_VPADD:
4596 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4597 X1011, BITS4(N,Q,M,1), regM);
4598 break;
4599 case ARMneon_VPADDFP:
4600 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4601 X1101, BITS4(N,Q,M,0), regM);
4602 break;
4603 case ARMneon_VPMINU:
4604 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4605 X1010, BITS4(N,Q,M,1), regM);
4606 break;
4607 case ARMneon_VPMINS:
4608 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4609 X1010, BITS4(N,Q,M,1), regM);
4610 break;
4611 case ARMneon_VPMAXU:
4612 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4613 X1010, BITS4(N,Q,M,0), regM);
4614 break;
4615 case ARMneon_VPMAXS:
4616 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4617 X1010, BITS4(N,Q,M,0), regM);
4618 break;
4619 case ARMneon_VADDFP: /* VADD reg, reg, reg */
4620 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4621 X1101, BITS4(N,Q,M,0), regM);
4622 break;
4623 case ARMneon_VSUBFP: /* VADD reg, reg, reg */
4624 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4625 X1101, BITS4(N,Q,M,0), regM);
4626 break;
4627 case ARMneon_VABDFP: /* VABD reg, reg, reg */
4628 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4629 X1101, BITS4(N,Q,M,0), regM);
4630 break;
4631 case ARMneon_VMINF:
4632 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4633 X1111, BITS4(N,Q,M,0), regM);
4634 break;
4635 case ARMneon_VMAXF:
4636 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4637 X1111, BITS4(N,Q,M,0), regM);
4638 break;
4639 case ARMneon_VPMINF:
4640 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4641 X1111, BITS4(N,Q,M,0), regM);
4642 break;
4643 case ARMneon_VPMAXF:
4644 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4645 X1111, BITS4(N,Q,M,0), regM);
4646 break;
4647 case ARMneon_VRECPS:
4648 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
4649 BITS4(N,Q,M,1), regM);
4650 break;
4651 case ARMneon_VCGTF:
4652 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
4653 BITS4(N,Q,M,0), regM);
4654 break;
4655 case ARMneon_VCGEF:
4656 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
4657 BITS4(N,Q,M,0), regM);
4658 break;
4659 case ARMneon_VCEQF:
4660 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
4661 BITS4(N,Q,M,0), regM);
4662 break;
4663 case ARMneon_VRSQRTS:
4664 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
4665 BITS4(N,Q,M,1), regM);
4666 break;
4667 default:
4668 goto bad;
4669 }
4670 *p++ = insn;
4671 goto done;
4672 }
4673 case ARMin_NShift: {
4674 UInt Q = i->ARMin.NShift.Q ? 1 : 0;
4675 UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
4676 ? (qregEnc(i->ARMin.NShift.dst) << 1)
4677 : dregEnc(i->ARMin.NShift.dst);
4678 UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
4679 ? (qregEnc(i->ARMin.NShift.argL) << 1)
4680 : dregEnc(i->ARMin.NShift.argL);
4681 UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
4682 ? (qregEnc(i->ARMin.NShift.argR) << 1)
4683 : dregEnc(i->ARMin.NShift.argR);
4684 UInt sz1 = i->ARMin.NShift.size >> 1;
4685 UInt sz2 = i->ARMin.NShift.size & 1;
4686 UInt D = regD >> 4;
4687 UInt N = regN >> 4;
4688 UInt M = regM >> 4;
4689 UInt insn;
4690 regD &= 0xF;
4691 regM &= 0xF;
4692 regN &= 0xF;
4693 switch (i->ARMin.NShift.op) {
4694 case ARMneon_VSHL:
4695 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4696 X0100, BITS4(N,Q,M,0), regM);
4697 break;
4698 case ARMneon_VSAL:
4699 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4700 X0100, BITS4(N,Q,M,0), regM);
4701 break;
4702 case ARMneon_VQSHL:
4703 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4704 X0100, BITS4(N,Q,M,1), regM);
4705 break;
4706 case ARMneon_VQSAL:
4707 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4708 X0100, BITS4(N,Q,M,1), regM);
4709 break;
4710 default:
4711 goto bad;
4712 }
4713 *p++ = insn;
4714 goto done;
4715 }
4716 case ARMin_NShl64: {
4717 HReg regDreg = i->ARMin.NShl64.dst;
4718 HReg regMreg = i->ARMin.NShl64.src;
4719 UInt amt = i->ARMin.NShl64.amt;
4720 vassert(amt >= 1 && amt <= 63);
4721 vassert(hregClass(regDreg) == HRcFlt64);
4722 vassert(hregClass(regMreg) == HRcFlt64);
4723 UInt regD = dregEnc(regDreg);
4724 UInt regM = dregEnc(regMreg);
4725 UInt D = (regD >> 4) & 1;
4726 UInt Vd = regD & 0xF;
4727 UInt L = 1;
4728 UInt Q = 0; /* always 64-bit */
4729 UInt M = (regM >> 4) & 1;
4730 UInt Vm = regM & 0xF;
4731 UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
4732 amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
4733 *p++ = insn;
4734 goto done;
4735 }
4736 case ARMin_NeonImm: {
4737 UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
4738 UInt regD = Q ? (qregEnc(i->ARMin.NeonImm.dst) << 1) :
4739 dregEnc(i->ARMin.NeonImm.dst);
4740 UInt D = regD >> 4;
4741 UInt imm = i->ARMin.NeonImm.imm->imm8;
4742 UInt tp = i->ARMin.NeonImm.imm->type;
4743 UInt j = imm >> 7;
4744 UInt imm3 = (imm >> 4) & 0x7;
4745 UInt imm4 = imm & 0xF;
4746 UInt cmode, op;
4747 UInt insn;
4748 regD &= 0xF;
4749 if (tp == 9)
4750 op = 1;
4751 else
4752 op = 0;
4753 switch (tp) {
4754 case 0:
4755 case 1:
4756 case 2:
4757 case 3:
4758 case 4:
4759 case 5:
4760 cmode = tp << 1;
4761 break;
4762 case 9:
4763 case 6:
4764 cmode = 14;
4765 break;
4766 case 7:
4767 cmode = 12;
4768 break;
4769 case 8:
4770 cmode = 13;
4771 break;
4772 case 10:
4773 cmode = 15;
4774 break;
4775 default:
4776 vpanic("ARMin_NeonImm");
4777
4778 }
4779 insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
4780 cmode, BITS4(0,Q,op,1), imm4);
4781 *p++ = insn;
4782 goto done;
4783 }
4784 case ARMin_NCMovQ: {
4785 UInt cc = (UInt)i->ARMin.NCMovQ.cond;
4786 UInt qM = qregEnc(i->ARMin.NCMovQ.src) << 1;
4787 UInt qD = qregEnc(i->ARMin.NCMovQ.dst) << 1;
4788 UInt vM = qM & 0xF;
4789 UInt vD = qD & 0xF;
4790 UInt M = (qM >> 4) & 1;
4791 UInt D = (qD >> 4) & 1;
4792 vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
4793 /* b!cc here+8: !cc A00 0000 */
4794 UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
4795 *p++ = insn;
4796 /* vmov qD, qM */
4797 insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
4798 vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
4799 *p++ = insn;
4800 goto done;
4801 }
4802 case ARMin_Add32: {
4803 UInt regD = iregEnc(i->ARMin.Add32.rD);
4804 UInt regN = iregEnc(i->ARMin.Add32.rN);
4805 UInt imm32 = i->ARMin.Add32.imm32;
4806 vassert(regD != regN);
4807 /* MOV regD, imm32 */
4808 p = imm32_to_ireg((UInt *)p, regD, imm32);
4809 /* ADD regD, regN, regD */
4810 UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
4811 *p++ = insn;
4812 goto done;
4813 }
4814
4815 case ARMin_EvCheck: {
4816 /* We generate:
4817 ldr r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
4818 subs r12, r12, #1 (A1)
4819 str r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
4820 bpl nofail
4821 ldr r12, [r8 + #0] 0 == offsetof(host_EvC_FAILADDR)
4822 bx r12
4823 nofail:
4824 */
4825 UInt* p0 = p;
4826 p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4827 i->ARMin.EvCheck.amCounter);
4828 *p++ = 0xE25CC001; /* subs r12, r12, #1 */
4829 p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
4830 i->ARMin.EvCheck.amCounter);
4831 *p++ = 0x5A000001; /* bpl nofail */
4832 p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4833 i->ARMin.EvCheck.amFailAddr);
4834 *p++ = 0xE12FFF1C; /* bx r12 */
4835 /* nofail: */
4836
4837 /* Crosscheck */
4838 vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
4839 goto done;
4840 }
4841
4842 case ARMin_ProfInc: {
4843 /* We generate:
4844 (ctrP is unknown now, so use 0x65556555 in the
4845 expectation that a later call to LibVEX_patchProfCtr
4846 will be used to fill in the immediate fields once the
4847 right value is known.)
4848 movw r12, lo16(0x65556555)
4849 movt r12, lo16(0x65556555)
4850 ldr r11, [r12]
4851 adds r11, r11, #1
4852 str r11, [r12]
4853 ldr r11, [r12+4]
4854 adc r11, r11, #0
4855 str r11, [r12+4]
4856 */
4857 p = imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555);
4858 *p++ = 0xE59CB000;
4859 *p++ = 0xE29BB001;
4860 *p++ = 0xE58CB000;
4861 *p++ = 0xE59CB004;
4862 *p++ = 0xE2ABB000;
4863 *p++ = 0xE58CB004;
4864 /* Tell the caller .. */
4865 vassert(!(*is_profInc));
4866 *is_profInc = True;
4867 goto done;
4868 }
4869
4870 /* ... */
4871 default:
4872 goto bad;
4873 }
4874
4875 bad:
4876 ppARMInstr(i);
4877 vpanic("emit_ARMInstr");
4878 /*NOTREACHED*/
4879
4880 done:
4881 vassert(((UChar*)p) - &buf[0] <= 32);
4882 return ((UChar*)p) - &buf[0];
4883 }
4884
4885
4886 /* How big is an event check? See case for ARMin_EvCheck in
4887 emit_ARMInstr just above. That crosschecks what this returns, so
4888 we can tell if we're inconsistent. */
evCheckSzB_ARM(void)4889 Int evCheckSzB_ARM (void)
4890 {
4891 return 24;
4892 }
4893
4894
4895 /* NB: what goes on here has to be very closely coordinated with the
4896 emitInstr case for XDirect, above. */
chainXDirect_ARM(VexEndness endness_host,void * place_to_chain,const void * disp_cp_chain_me_EXPECTED,const void * place_to_jump_to)4897 VexInvalRange chainXDirect_ARM ( VexEndness endness_host,
4898 void* place_to_chain,
4899 const void* disp_cp_chain_me_EXPECTED,
4900 const void* place_to_jump_to )
4901 {
4902 vassert(endness_host == VexEndnessLE);
4903
4904 /* What we're expecting to see is:
4905 movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
4906 movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
4907 blx r12
4908 viz
4909 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4910 E1 2F FF 3C
4911 */
4912 UInt* p = (UInt*)place_to_chain;
4913 vassert(0 == (3 & (HWord)p));
4914 vassert(is_imm32_to_ireg_EXACTLY2(
4915 p, /*r*/12, (UInt)(Addr)disp_cp_chain_me_EXPECTED));
4916 vassert(p[2] == 0xE12FFF3C);
4917 /* And what we want to change it to is either:
4918 (general case)
4919 movw r12, lo16(place_to_jump_to)
4920 movt r12, hi16(place_to_jump_to)
4921 bx r12
4922 viz
4923 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4924 E1 2F FF 1C
4925 ---OR---
4926 in the case where the displacement falls within 26 bits
4927 b disp24; undef; undef
4928 viz
4929 EA <3 bytes == disp24>
4930 FF 00 00 00
4931 FF 00 00 00
4932
4933 In both cases the replacement has the same length as the original.
4934 To remain sane & verifiable,
4935 (1) limit the displacement for the short form to
4936 (say) +/- 30 million, so as to avoid wraparound
4937 off-by-ones
4938 (2) even if the short form is applicable, once every (say)
4939 1024 times use the long form anyway, so as to maintain
4940 verifiability
4941 */
4942
4943 /* This is the delta we need to put into a B insn. It's relative
4944 to the start of the next-but-one insn, hence the -8. */
4945 Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 8;
4946 Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
4947 vassert(0 == (delta & (Long)3));
4948
4949 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4950 if (shortOK) {
4951 shortCTR++; // thread safety bleh
4952 if (0 == (shortCTR & 0x3FF)) {
4953 shortOK = False;
4954 if (0)
4955 vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
4956 "using long form\n", shortCTR);
4957 }
4958 }
4959
4960 /* And make the modifications. */
4961 if (shortOK) {
4962 UInt uimm24 = (UInt)(delta >> 2);
4963 UInt uimm24_shl8 = uimm24 << 8;
4964 Int simm24 = (Int)uimm24_shl8;
4965 simm24 >>= 8;
4966 vassert(uimm24 == simm24);
4967 p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
4968 p[1] = 0xFF000000;
4969 p[2] = 0xFF000000;
4970 } else {
4971 (void)imm32_to_ireg_EXACTLY2(
4972 p, /*r*/12, (UInt)(Addr)place_to_jump_to);
4973 p[2] = 0xE12FFF1C;
4974 }
4975
4976 VexInvalRange vir = {(HWord)p, 12};
4977 return vir;
4978 }
4979
4980
4981 /* NB: what goes on here has to be very closely coordinated with the
4982 emitInstr case for XDirect, above. */
unchainXDirect_ARM(VexEndness endness_host,void * place_to_unchain,const void * place_to_jump_to_EXPECTED,const void * disp_cp_chain_me)4983 VexInvalRange unchainXDirect_ARM ( VexEndness endness_host,
4984 void* place_to_unchain,
4985 const void* place_to_jump_to_EXPECTED,
4986 const void* disp_cp_chain_me )
4987 {
4988 vassert(endness_host == VexEndnessLE);
4989
4990 /* What we're expecting to see is:
4991 (general case)
4992 movw r12, lo16(place_to_jump_to_EXPECTED)
4993 movt r12, lo16(place_to_jump_to_EXPECTED)
4994 bx r12
4995 viz
4996 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4997 E1 2F FF 1C
4998 ---OR---
4999 in the case where the displacement falls within 26 bits
5000 b disp24; undef; undef
5001 viz
5002 EA <3 bytes == disp24>
5003 FF 00 00 00
5004 FF 00 00 00
5005 */
5006 UInt* p = (UInt*)place_to_unchain;
5007 vassert(0 == (3 & (HWord)p));
5008
5009 Bool valid = False;
5010 if (is_imm32_to_ireg_EXACTLY2(
5011 p, /*r*/12, (UInt)(Addr)place_to_jump_to_EXPECTED)
5012 && p[2] == 0xE12FFF1C) {
5013 valid = True; /* it's the long form */
5014 if (0)
5015 vex_printf("QQQ unchainXDirect_ARM: found long form\n");
5016 } else
5017 if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
5018 /* It's the short form. Check the displacement is right. */
5019 Int simm24 = p[0] & 0x00FFFFFF;
5020 simm24 <<= 8; simm24 >>= 8;
5021 if ((UChar*)p + (simm24 << 2) + 8 == place_to_jump_to_EXPECTED) {
5022 valid = True;
5023 if (0)
5024 vex_printf("QQQ unchainXDirect_ARM: found short form\n");
5025 }
5026 }
5027 vassert(valid);
5028
5029 /* And what we want to change it to is:
5030 movw r12, lo16(disp_cp_chain_me)
5031 movt r12, hi16(disp_cp_chain_me)
5032 blx r12
5033 viz
5034 <8 bytes generated by imm32_to_ireg_EXACTLY2>
5035 E1 2F FF 3C
5036 */
5037 (void)imm32_to_ireg_EXACTLY2(
5038 p, /*r*/12, (UInt)(Addr)disp_cp_chain_me);
5039 p[2] = 0xE12FFF3C;
5040 VexInvalRange vir = {(HWord)p, 12};
5041 return vir;
5042 }
5043
5044
5045 /* Patch the counter address into a profile inc point, as previously
5046 created by the ARMin_ProfInc case for emit_ARMInstr. */
patchProfInc_ARM(VexEndness endness_host,void * place_to_patch,const ULong * location_of_counter)5047 VexInvalRange patchProfInc_ARM ( VexEndness endness_host,
5048 void* place_to_patch,
5049 const ULong* location_of_counter )
5050 {
5051 vassert(endness_host == VexEndnessLE);
5052 vassert(sizeof(ULong*) == 4);
5053 UInt* p = (UInt*)place_to_patch;
5054 vassert(0 == (3 & (HWord)p));
5055 vassert(is_imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555));
5056 vassert(p[2] == 0xE59CB000);
5057 vassert(p[3] == 0xE29BB001);
5058 vassert(p[4] == 0xE58CB000);
5059 vassert(p[5] == 0xE59CB004);
5060 vassert(p[6] == 0xE2ABB000);
5061 vassert(p[7] == 0xE58CB004);
5062 imm32_to_ireg_EXACTLY2(p, /*r*/12, (UInt)(Addr)location_of_counter);
5063 VexInvalRange vir = {(HWord)p, 8};
5064 return vir;
5065 }
5066
5067
5068 #undef BITS4
5069 #undef X0000
5070 #undef X0001
5071 #undef X0010
5072 #undef X0011
5073 #undef X0100
5074 #undef X0101
5075 #undef X0110
5076 #undef X0111
5077 #undef X1000
5078 #undef X1001
5079 #undef X1010
5080 #undef X1011
5081 #undef X1100
5082 #undef X1101
5083 #undef X1110
5084 #undef X1111
5085 #undef XXXXX___
5086 #undef XXXXXX__
5087 #undef XXX___XX
5088 #undef XXXXX__X
5089 #undef XXXXXXXX
5090 #undef XX______
5091
5092 /*---------------------------------------------------------------*/
5093 /*--- end host_arm_defs.c ---*/
5094 /*---------------------------------------------------------------*/
5095