• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                   host_arm_defs.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2015 OpenWorks LLP
11       info@open-works.net
12 
13    NEON support is
14    Copyright (C) 2010-2015 Samsung Electronics
15    contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16               and Kirill Batuzov <batuzovk@ispras.ru>
17 
18    This program is free software; you can redistribute it and/or
19    modify it under the terms of the GNU General Public License as
20    published by the Free Software Foundation; either version 2 of the
21    License, or (at your option) any later version.
22 
23    This program is distributed in the hope that it will be useful, but
24    WITHOUT ANY WARRANTY; without even the implied warranty of
25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26    General Public License for more details.
27 
28    You should have received a copy of the GNU General Public License
29    along with this program; if not, write to the Free Software
30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31    02110-1301, USA.
32 
33    The GNU General Public License is contained in the file COPYING.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
39 
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_arm_defs.h"
43 
44 UInt arm_hwcaps = 0;
45 
46 
47 /* --------- Registers. --------- */
48 
getRRegUniverse_ARM(void)49 const RRegUniverse* getRRegUniverse_ARM ( void )
50 {
51    /* The real-register universe is a big constant, so we just want to
52       initialise it once. */
53    static RRegUniverse rRegUniverse_ARM;
54    static Bool         rRegUniverse_ARM_initted = False;
55 
56    /* Handy shorthand, nothing more */
57    RRegUniverse* ru = &rRegUniverse_ARM;
58 
59    /* This isn't thread-safe.  Sigh. */
60    if (LIKELY(rRegUniverse_ARM_initted))
61       return ru;
62 
63    RRegUniverse__init(ru);
64 
65    /* Add the registers.  The initial segment of this array must be
66       those available for allocation by reg-alloc, and those that
67       follow are not available for allocation. */
68 
69    /* Callee saves ones are listed first, since we prefer them
70       if they're available. */
71    ru->regs[ru->size++] = hregARM_R4();
72    ru->regs[ru->size++] = hregARM_R5();
73    ru->regs[ru->size++] = hregARM_R6();
74    ru->regs[ru->size++] = hregARM_R7();
75    ru->regs[ru->size++] = hregARM_R10();
76    ru->regs[ru->size++] = hregARM_R11();
77    /* Otherwise we'll have to slum it out with caller-saves ones. */
78    ru->regs[ru->size++] = hregARM_R0();
79    ru->regs[ru->size++] = hregARM_R1();
80    ru->regs[ru->size++] = hregARM_R2();
81    ru->regs[ru->size++] = hregARM_R3();
82    ru->regs[ru->size++] = hregARM_R9();
83    /* FP registers.  Note: these are all callee-save.  Yay!  Hence we
84       don't need to mention them as trashed in getHRegUsage for
85       ARMInstr_Call. */
86    ru->regs[ru->size++] = hregARM_D8();
87    ru->regs[ru->size++] = hregARM_D9();
88    ru->regs[ru->size++] = hregARM_D10();
89    ru->regs[ru->size++] = hregARM_D11();
90    ru->regs[ru->size++] = hregARM_D12();
91    ru->regs[ru->size++] = hregARM_S26();
92    ru->regs[ru->size++] = hregARM_S27();
93    ru->regs[ru->size++] = hregARM_S28();
94    ru->regs[ru->size++] = hregARM_S29();
95    ru->regs[ru->size++] = hregARM_S30();
96    ru->regs[ru->size++] = hregARM_Q8();
97    ru->regs[ru->size++] = hregARM_Q9();
98    ru->regs[ru->size++] = hregARM_Q10();
99    ru->regs[ru->size++] = hregARM_Q11();
100    ru->regs[ru->size++] = hregARM_Q12();
101    ru->allocable = ru->size;
102 
103    /* And other regs, not available to the allocator. */
104 
105    // unavail: r8 as GSP
106    // r12 is used as a spill/reload temporary
107    // r13 as SP
108    // r14 as LR
109    // r15 as PC
110    //
111    // All in all, we have 11 allocatable integer registers:
112    // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
113    // and r12 dedicated as a spill temporary.
114    // 13 14 and 15 are not under the allocator's control.
115    //
116    // Hence for the allocatable registers we have:
117    //
118    // callee-saved: 4 5 6 7 (8) 9 10 11
119    // caller-saved: 0 1 2 3
120    // Note 9 is ambiguous: the base EABI does not give an e/r-saved
121    // designation for it, but the Linux instantiation of the ABI
122    // specifies it as callee-saved.
123    //
124    // If the set of available registers changes or if the e/r status
125    // changes, be sure to re-check/sync the definition of
126    // getHRegUsage for ARMInstr_Call too.
127    ru->regs[ru->size++] = hregARM_R8();
128    ru->regs[ru->size++] = hregARM_R12();
129    ru->regs[ru->size++] = hregARM_R13();
130    ru->regs[ru->size++] = hregARM_R14();
131    ru->regs[ru->size++] = hregARM_R15();
132    ru->regs[ru->size++] = hregARM_Q13();
133    ru->regs[ru->size++] = hregARM_Q14();
134    ru->regs[ru->size++] = hregARM_Q15();
135 
136    rRegUniverse_ARM_initted = True;
137 
138    RRegUniverse__check_is_sane(ru);
139    return ru;
140 }
141 
142 
ppHRegARM(HReg reg)143 void ppHRegARM ( HReg reg )  {
144    Int r;
145    /* Be generic for all virtual regs. */
146    if (hregIsVirtual(reg)) {
147       ppHReg(reg);
148       return;
149    }
150    /* But specific for real regs. */
151    switch (hregClass(reg)) {
152       case HRcInt32:
153          r = hregEncoding(reg);
154          vassert(r >= 0 && r < 16);
155          vex_printf("r%d", r);
156          return;
157       case HRcFlt64:
158          r = hregEncoding(reg);
159          vassert(r >= 0 && r < 32);
160          vex_printf("d%d", r);
161          return;
162       case HRcFlt32:
163          r = hregEncoding(reg);
164          vassert(r >= 0 && r < 32);
165          vex_printf("s%d", r);
166          return;
167       case HRcVec128:
168          r = hregEncoding(reg);
169          vassert(r >= 0 && r < 16);
170          vex_printf("q%d", r);
171          return;
172       default:
173          vpanic("ppHRegARM");
174    }
175 }
176 
177 
178 /* --------- Condition codes, ARM encoding. --------- */
179 
showARMCondCode(ARMCondCode cond)180 const HChar* showARMCondCode ( ARMCondCode cond ) {
181    switch (cond) {
182        case ARMcc_EQ:  return "eq";
183        case ARMcc_NE:  return "ne";
184        case ARMcc_HS:  return "hs";
185        case ARMcc_LO:  return "lo";
186        case ARMcc_MI:  return "mi";
187        case ARMcc_PL:  return "pl";
188        case ARMcc_VS:  return "vs";
189        case ARMcc_VC:  return "vc";
190        case ARMcc_HI:  return "hi";
191        case ARMcc_LS:  return "ls";
192        case ARMcc_GE:  return "ge";
193        case ARMcc_LT:  return "lt";
194        case ARMcc_GT:  return "gt";
195        case ARMcc_LE:  return "le";
196        case ARMcc_AL:  return "al"; // default
197        case ARMcc_NV:  return "nv";
198        default: vpanic("showARMCondCode");
199    }
200 }
201 
202 
203 /* --------- Mem AModes: Addressing Mode 1 --------- */
204 
ARMAMode1_RI(HReg reg,Int simm13)205 ARMAMode1* ARMAMode1_RI  ( HReg reg, Int simm13 ) {
206    ARMAMode1* am        = LibVEX_Alloc_inline(sizeof(ARMAMode1));
207    am->tag              = ARMam1_RI;
208    am->ARMam1.RI.reg    = reg;
209    am->ARMam1.RI.simm13 = simm13;
210    vassert(-4095 <= simm13 && simm13 <= 4095);
211    return am;
212 }
ARMAMode1_RRS(HReg base,HReg index,UInt shift)213 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
214    ARMAMode1* am        = LibVEX_Alloc_inline(sizeof(ARMAMode1));
215    am->tag              = ARMam1_RRS;
216    am->ARMam1.RRS.base  = base;
217    am->ARMam1.RRS.index = index;
218    am->ARMam1.RRS.shift = shift;
219    vassert(0 <= shift && shift <= 3);
220    return am;
221 }
222 
ppARMAMode1(ARMAMode1 * am)223 void ppARMAMode1 ( ARMAMode1* am ) {
224    switch (am->tag) {
225       case ARMam1_RI:
226          vex_printf("%d(", am->ARMam1.RI.simm13);
227          ppHRegARM(am->ARMam1.RI.reg);
228          vex_printf(")");
229          break;
230       case ARMam1_RRS:
231          vex_printf("(");
232          ppHRegARM(am->ARMam1.RRS.base);
233          vex_printf(",");
234          ppHRegARM(am->ARMam1.RRS.index);
235          vex_printf(",%u)", am->ARMam1.RRS.shift);
236          break;
237       default:
238          vassert(0);
239    }
240 }
241 
addRegUsage_ARMAMode1(HRegUsage * u,ARMAMode1 * am)242 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
243    switch (am->tag) {
244       case ARMam1_RI:
245          addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
246          return;
247       case ARMam1_RRS:
248          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
249          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
250          //   return;
251       default:
252          vpanic("addRegUsage_ARMAmode1");
253    }
254 }
255 
mapRegs_ARMAMode1(HRegRemap * m,ARMAMode1 * am)256 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
257    switch (am->tag) {
258       case ARMam1_RI:
259          am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
260          return;
261       case ARMam1_RRS:
262          //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
263          //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
264          //return;
265       default:
266          vpanic("mapRegs_ARMAmode1");
267    }
268 }
269 
270 
271 /* --------- Mem AModes: Addressing Mode 2 --------- */
272 
ARMAMode2_RI(HReg reg,Int simm9)273 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
274    ARMAMode2* am       = LibVEX_Alloc_inline(sizeof(ARMAMode2));
275    am->tag             = ARMam2_RI;
276    am->ARMam2.RI.reg   = reg;
277    am->ARMam2.RI.simm9 = simm9;
278    vassert(-255 <= simm9 && simm9 <= 255);
279    return am;
280 }
ARMAMode2_RR(HReg base,HReg index)281 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
282    ARMAMode2* am       = LibVEX_Alloc_inline(sizeof(ARMAMode2));
283    am->tag             = ARMam2_RR;
284    am->ARMam2.RR.base  = base;
285    am->ARMam2.RR.index = index;
286    return am;
287 }
288 
ppARMAMode2(ARMAMode2 * am)289 void ppARMAMode2 ( ARMAMode2* am ) {
290    switch (am->tag) {
291       case ARMam2_RI:
292          vex_printf("%d(", am->ARMam2.RI.simm9);
293          ppHRegARM(am->ARMam2.RI.reg);
294          vex_printf(")");
295          break;
296       case ARMam2_RR:
297          vex_printf("(");
298          ppHRegARM(am->ARMam2.RR.base);
299          vex_printf(",");
300          ppHRegARM(am->ARMam2.RR.index);
301          vex_printf(")");
302          break;
303       default:
304          vassert(0);
305    }
306 }
307 
addRegUsage_ARMAMode2(HRegUsage * u,ARMAMode2 * am)308 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
309    switch (am->tag) {
310       case ARMam2_RI:
311          addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
312          return;
313       case ARMam2_RR:
314          //    addHRegUse(u, HRmRead, am->ARMam2.RR.base);
315          //    addHRegUse(u, HRmRead, am->ARMam2.RR.index);
316          //   return;
317       default:
318          vpanic("addRegUsage_ARMAmode2");
319    }
320 }
321 
mapRegs_ARMAMode2(HRegRemap * m,ARMAMode2 * am)322 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
323    switch (am->tag) {
324       case ARMam2_RI:
325          am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
326          return;
327       case ARMam2_RR:
328          //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
329          //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
330          //return;
331       default:
332          vpanic("mapRegs_ARMAmode2");
333    }
334 }
335 
336 
337 /* --------- Mem AModes: Addressing Mode VFP --------- */
338 
mkARMAModeV(HReg reg,Int simm11)339 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
340    ARMAModeV* am = LibVEX_Alloc_inline(sizeof(ARMAModeV));
341    vassert(simm11 >= -1020 && simm11 <= 1020);
342    vassert(0 == (simm11 & 3));
343    am->reg    = reg;
344    am->simm11 = simm11;
345    return am;
346 }
347 
ppARMAModeV(ARMAModeV * am)348 void ppARMAModeV ( ARMAModeV* am ) {
349    vex_printf("%d(", am->simm11);
350    ppHRegARM(am->reg);
351    vex_printf(")");
352 }
353 
addRegUsage_ARMAModeV(HRegUsage * u,ARMAModeV * am)354 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
355    addHRegUse(u, HRmRead, am->reg);
356 }
357 
mapRegs_ARMAModeV(HRegRemap * m,ARMAModeV * am)358 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
359    am->reg = lookupHRegRemap(m, am->reg);
360 }
361 
362 
363 /* --------- Mem AModes: Addressing Mode Neon ------- */
364 
mkARMAModeN_RR(HReg rN,HReg rM)365 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
366    ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
367    am->tag = ARMamN_RR;
368    am->ARMamN.RR.rN = rN;
369    am->ARMamN.RR.rM = rM;
370    return am;
371 }
372 
mkARMAModeN_R(HReg rN)373 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
374    ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
375    am->tag = ARMamN_R;
376    am->ARMamN.R.rN = rN;
377    return am;
378 }
379 
addRegUsage_ARMAModeN(HRegUsage * u,ARMAModeN * am)380 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
381    if (am->tag == ARMamN_R) {
382       addHRegUse(u, HRmRead, am->ARMamN.R.rN);
383    } else {
384       addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
385       addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
386    }
387 }
388 
mapRegs_ARMAModeN(HRegRemap * m,ARMAModeN * am)389 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
390    if (am->tag == ARMamN_R) {
391       am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
392    } else {
393       am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
394       am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
395    }
396 }
397 
ppARMAModeN(ARMAModeN * am)398 void ppARMAModeN ( ARMAModeN* am ) {
399    vex_printf("[");
400    if (am->tag == ARMamN_R) {
401       ppHRegARM(am->ARMamN.R.rN);
402    } else {
403       ppHRegARM(am->ARMamN.RR.rN);
404    }
405    vex_printf("]");
406    if (am->tag == ARMamN_RR) {
407       vex_printf(", ");
408       ppHRegARM(am->ARMamN.RR.rM);
409    }
410 }
411 
412 
413 /* --------- Reg or imm-8x4 operands --------- */
414 
ROR32(UInt x,UInt sh)415 static UInt ROR32 ( UInt x, UInt sh ) {
416    vassert(sh >= 0 && sh < 32);
417    if (sh == 0)
418       return x;
419    else
420       return (x << (32-sh)) | (x >> sh);
421 }
422 
ARMRI84_I84(UShort imm8,UShort imm4)423 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
424    ARMRI84* ri84          = LibVEX_Alloc_inline(sizeof(ARMRI84));
425    ri84->tag              = ARMri84_I84;
426    ri84->ARMri84.I84.imm8 = imm8;
427    ri84->ARMri84.I84.imm4 = imm4;
428    vassert(imm8 >= 0 && imm8 <= 255);
429    vassert(imm4 >= 0 && imm4 <= 15);
430    return ri84;
431 }
ARMRI84_R(HReg reg)432 ARMRI84* ARMRI84_R ( HReg reg ) {
433    ARMRI84* ri84       = LibVEX_Alloc_inline(sizeof(ARMRI84));
434    ri84->tag           = ARMri84_R;
435    ri84->ARMri84.R.reg = reg;
436    return ri84;
437 }
438 
ppARMRI84(ARMRI84 * ri84)439 void ppARMRI84 ( ARMRI84* ri84 ) {
440    switch (ri84->tag) {
441       case ARMri84_I84:
442          vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
443                                   2 * ri84->ARMri84.I84.imm4));
444          break;
445       case ARMri84_R:
446          ppHRegARM(ri84->ARMri84.R.reg);
447          break;
448       default:
449          vassert(0);
450    }
451 }
452 
addRegUsage_ARMRI84(HRegUsage * u,ARMRI84 * ri84)453 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
454    switch (ri84->tag) {
455       case ARMri84_I84:
456          return;
457       case ARMri84_R:
458          addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
459          return;
460       default:
461          vpanic("addRegUsage_ARMRI84");
462    }
463 }
464 
mapRegs_ARMRI84(HRegRemap * m,ARMRI84 * ri84)465 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
466    switch (ri84->tag) {
467       case ARMri84_I84:
468          return;
469       case ARMri84_R:
470          ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
471          return;
472       default:
473          vpanic("mapRegs_ARMRI84");
474    }
475 }
476 
477 
478 /* --------- Reg or imm5 operands --------- */
479 
ARMRI5_I5(UInt imm5)480 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
481    ARMRI5* ri5         = LibVEX_Alloc_inline(sizeof(ARMRI5));
482    ri5->tag            = ARMri5_I5;
483    ri5->ARMri5.I5.imm5 = imm5;
484    vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
485    return ri5;
486 }
ARMRI5_R(HReg reg)487 ARMRI5* ARMRI5_R ( HReg reg ) {
488    ARMRI5* ri5       = LibVEX_Alloc_inline(sizeof(ARMRI5));
489    ri5->tag          = ARMri5_R;
490    ri5->ARMri5.R.reg = reg;
491    return ri5;
492 }
493 
ppARMRI5(ARMRI5 * ri5)494 void ppARMRI5 ( ARMRI5* ri5 ) {
495    switch (ri5->tag) {
496       case ARMri5_I5:
497          vex_printf("%u", ri5->ARMri5.I5.imm5);
498          break;
499       case ARMri5_R:
500          ppHRegARM(ri5->ARMri5.R.reg);
501          break;
502       default:
503          vassert(0);
504    }
505 }
506 
addRegUsage_ARMRI5(HRegUsage * u,ARMRI5 * ri5)507 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
508    switch (ri5->tag) {
509       case ARMri5_I5:
510          return;
511       case ARMri5_R:
512          addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
513          return;
514       default:
515          vpanic("addRegUsage_ARMRI5");
516    }
517 }
518 
mapRegs_ARMRI5(HRegRemap * m,ARMRI5 * ri5)519 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
520    switch (ri5->tag) {
521       case ARMri5_I5:
522          return;
523       case ARMri5_R:
524          ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
525          return;
526       default:
527          vpanic("mapRegs_ARMRI5");
528    }
529 }
530 
531 /* -------- Neon Immediate operatnd --------- */
532 
ARMNImm_TI(UInt type,UInt imm8)533 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
534    ARMNImm* i = LibVEX_Alloc_inline(sizeof(ARMNImm));
535    i->type = type;
536    i->imm8 = imm8;
537    return i;
538 }
539 
ARMNImm_to_Imm64(ARMNImm * imm)540 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
541    int i, j;
542    ULong y, x = imm->imm8;
543    switch (imm->type) {
544       case 3:
545          x = x << 8; /* fallthrough */
546       case 2:
547          x = x << 8; /* fallthrough */
548       case 1:
549          x = x << 8; /* fallthrough */
550       case 0:
551          return (x << 32) | x;
552       case 5:
553       case 6:
554          if (imm->type == 5)
555             x = x << 8;
556          else
557             x = (x << 8) | x;
558          /* fallthrough */
559       case 4:
560          x = (x << 16) | x;
561          return (x << 32) | x;
562       case 8:
563          x = (x << 8) | 0xFF;
564          /* fallthrough */
565       case 7:
566          x = (x << 8) | 0xFF;
567          return (x << 32) | x;
568       case 9:
569          x = 0;
570          for (i = 7; i >= 0; i--) {
571             y = ((ULong)imm->imm8 >> i) & 1;
572             for (j = 0; j < 8; j++) {
573                x = (x << 1) | y;
574             }
575          }
576          return x;
577       case 10:
578          x |= (x & 0x80) << 5;
579          x |= (~x & 0x40) << 5;
580          x &= 0x187F; /* 0001 1000 0111 1111 */
581          x |= (x & 0x40) << 4;
582          x |= (x & 0x40) << 3;
583          x |= (x & 0x40) << 2;
584          x |= (x & 0x40) << 1;
585          x = x << 19;
586          x = (x << 32) | x;
587          return x;
588       default:
589          vpanic("ARMNImm_to_Imm64");
590    }
591 }
592 
Imm64_to_ARMNImm(ULong x)593 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
594    ARMNImm tmp;
595    if ((x & 0xFFFFFFFF) == (x >> 32)) {
596       if ((x & 0xFFFFFF00) == 0)
597          return ARMNImm_TI(0, x & 0xFF);
598       if ((x & 0xFFFF00FF) == 0)
599          return ARMNImm_TI(1, (x >> 8) & 0xFF);
600       if ((x & 0xFF00FFFF) == 0)
601          return ARMNImm_TI(2, (x >> 16) & 0xFF);
602       if ((x & 0x00FFFFFF) == 0)
603          return ARMNImm_TI(3, (x >> 24) & 0xFF);
604       if ((x & 0xFFFF00FF) == 0xFF)
605          return ARMNImm_TI(7, (x >> 8) & 0xFF);
606       if ((x & 0xFF00FFFF) == 0xFFFF)
607          return ARMNImm_TI(8, (x >> 16) & 0xFF);
608       if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
609          if ((x & 0xFF00) == 0)
610             return ARMNImm_TI(4, x & 0xFF);
611          if ((x & 0x00FF) == 0)
612             return ARMNImm_TI(5, (x >> 8) & 0xFF);
613          if ((x & 0xFF) == ((x >> 8) & 0xFF))
614             return ARMNImm_TI(6, x & 0xFF);
615       }
616       if ((x & 0x7FFFF) == 0) {
617          tmp.type = 10;
618          tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
619          if (ARMNImm_to_Imm64(&tmp) == x)
620             return ARMNImm_TI(tmp.type, tmp.imm8);
621       }
622    } else {
623       /* This can only be type 9. */
624       tmp.imm8 = (((x >> 56) & 1) << 7)
625                | (((x >> 48) & 1) << 6)
626                | (((x >> 40) & 1) << 5)
627                | (((x >> 32) & 1) << 4)
628                | (((x >> 24) & 1) << 3)
629                | (((x >> 16) & 1) << 2)
630                | (((x >>  8) & 1) << 1)
631                | (((x >>  0) & 1) << 0);
632       tmp.type = 9;
633       if (ARMNImm_to_Imm64 (&tmp) == x)
634          return ARMNImm_TI(tmp.type, tmp.imm8);
635    }
636    return NULL;
637 }
638 
ppARMNImm(ARMNImm * i)639 void ppARMNImm (ARMNImm* i) {
640    ULong x = ARMNImm_to_Imm64(i);
641    vex_printf("0x%llX%llX", x, x);
642 }
643 
644 /* -- Register or scalar operand --- */
645 
mkARMNRS(ARMNRS_tag tag,HReg reg,UInt index)646 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
647 {
648    ARMNRS *p = LibVEX_Alloc_inline(sizeof(ARMNRS));
649    p->tag = tag;
650    p->reg = reg;
651    p->index = index;
652    return p;
653 }
654 
ppARMNRS(ARMNRS * p)655 void ppARMNRS(ARMNRS *p)
656 {
657    ppHRegARM(p->reg);
658    if (p->tag == ARMNRS_Scalar) {
659       vex_printf("[%u]", p->index);
660    }
661 }
662 
663 /* --------- Instructions. --------- */
664 
showARMAluOp(ARMAluOp op)665 const HChar* showARMAluOp ( ARMAluOp op ) {
666    switch (op) {
667       case ARMalu_ADD:  return "add";
668       case ARMalu_ADDS: return "adds";
669       case ARMalu_ADC:  return "adc";
670       case ARMalu_SUB:  return "sub";
671       case ARMalu_SUBS: return "subs";
672       case ARMalu_SBC:  return "sbc";
673       case ARMalu_AND:  return "and";
674       case ARMalu_BIC:  return "bic";
675       case ARMalu_OR:   return "orr";
676       case ARMalu_XOR:  return "xor";
677       default: vpanic("showARMAluOp");
678    }
679 }
680 
showARMShiftOp(ARMShiftOp op)681 const HChar* showARMShiftOp ( ARMShiftOp op ) {
682    switch (op) {
683       case ARMsh_SHL: return "shl";
684       case ARMsh_SHR: return "shr";
685       case ARMsh_SAR: return "sar";
686       default: vpanic("showARMShiftOp");
687    }
688 }
689 
showARMUnaryOp(ARMUnaryOp op)690 const HChar* showARMUnaryOp ( ARMUnaryOp op ) {
691    switch (op) {
692       case ARMun_NEG: return "neg";
693       case ARMun_NOT: return "not";
694       case ARMun_CLZ: return "clz";
695       default: vpanic("showARMUnaryOp");
696    }
697 }
698 
showARMMulOp(ARMMulOp op)699 const HChar* showARMMulOp ( ARMMulOp op ) {
700    switch (op) {
701       case ARMmul_PLAIN: return "mul";
702       case ARMmul_ZX:    return "umull";
703       case ARMmul_SX:    return "smull";
704       default: vpanic("showARMMulOp");
705    }
706 }
707 
showARMVfpOp(ARMVfpOp op)708 const HChar* showARMVfpOp ( ARMVfpOp op ) {
709    switch (op) {
710       case ARMvfp_ADD: return "add";
711       case ARMvfp_SUB: return "sub";
712       case ARMvfp_MUL: return "mul";
713       case ARMvfp_DIV: return "div";
714       default: vpanic("showARMVfpOp");
715    }
716 }
717 
showARMVfpUnaryOp(ARMVfpUnaryOp op)718 const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
719    switch (op) {
720       case ARMvfpu_COPY: return "cpy";
721       case ARMvfpu_NEG:  return "neg";
722       case ARMvfpu_ABS:  return "abs";
723       case ARMvfpu_SQRT: return "sqrt";
724       default: vpanic("showARMVfpUnaryOp");
725    }
726 }
727 
showARMNeonBinOp(ARMNeonBinOp op)728 const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
729    switch (op) {
730       case ARMneon_VAND: return "vand";
731       case ARMneon_VORR: return "vorr";
732       case ARMneon_VXOR: return "veor";
733       case ARMneon_VADD: return "vadd";
734       case ARMneon_VRHADDS: return "vrhadd";
735       case ARMneon_VRHADDU: return "vrhadd";
736       case ARMneon_VADDFP: return "vadd";
737       case ARMneon_VPADDFP: return "vpadd";
738       case ARMneon_VABDFP: return "vabd";
739       case ARMneon_VSUB: return "vsub";
740       case ARMneon_VSUBFP: return "vsub";
741       case ARMneon_VMINU: return "vmin";
742       case ARMneon_VMINS: return "vmin";
743       case ARMneon_VMINF: return "vmin";
744       case ARMneon_VMAXU: return "vmax";
745       case ARMneon_VMAXS: return "vmax";
746       case ARMneon_VMAXF: return "vmax";
747       case ARMneon_VQADDU: return "vqadd";
748       case ARMneon_VQADDS: return "vqadd";
749       case ARMneon_VQSUBU: return "vqsub";
750       case ARMneon_VQSUBS: return "vqsub";
751       case ARMneon_VCGTU:  return "vcgt";
752       case ARMneon_VCGTS:  return "vcgt";
753       case ARMneon_VCGTF:  return "vcgt";
754       case ARMneon_VCGEF:  return "vcgt";
755       case ARMneon_VCGEU:  return "vcge";
756       case ARMneon_VCGES:  return "vcge";
757       case ARMneon_VCEQ:  return "vceq";
758       case ARMneon_VCEQF:  return "vceq";
759       case ARMneon_VPADD:   return "vpadd";
760       case ARMneon_VPMINU:   return "vpmin";
761       case ARMneon_VPMINS:   return "vpmin";
762       case ARMneon_VPMINF:   return "vpmin";
763       case ARMneon_VPMAXU:   return "vpmax";
764       case ARMneon_VPMAXS:   return "vpmax";
765       case ARMneon_VPMAXF:   return "vpmax";
766       case ARMneon_VEXT:   return "vext";
767       case ARMneon_VMUL:   return "vmuli";
768       case ARMneon_VMULLU:   return "vmull";
769       case ARMneon_VMULLS:   return "vmull";
770       case ARMneon_VMULP:  return "vmul";
771       case ARMneon_VMULFP:  return "vmul";
772       case ARMneon_VMULLP:  return "vmul";
773       case ARMneon_VQDMULH: return "vqdmulh";
774       case ARMneon_VQRDMULH: return "vqrdmulh";
775       case ARMneon_VQDMULL: return "vqdmull";
776       case ARMneon_VTBL: return "vtbl";
777       case ARMneon_VRECPS: return "vrecps";
778       case ARMneon_VRSQRTS: return "vrecps";
779       case ARMneon_INVALID: return "??invalid??";
780       /* ... */
781       default: vpanic("showARMNeonBinOp");
782    }
783 }
784 
showARMNeonBinOpDataType(ARMNeonBinOp op)785 const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
786    switch (op) {
787       case ARMneon_VAND:
788       case ARMneon_VORR:
789       case ARMneon_VXOR:
790          return "";
791       case ARMneon_VADD:
792       case ARMneon_VSUB:
793       case ARMneon_VEXT:
794       case ARMneon_VMUL:
795       case ARMneon_VPADD:
796       case ARMneon_VTBL:
797       case ARMneon_VCEQ:
798          return ".i";
799       case ARMneon_VRHADDU:
800       case ARMneon_VMINU:
801       case ARMneon_VMAXU:
802       case ARMneon_VQADDU:
803       case ARMneon_VQSUBU:
804       case ARMneon_VCGTU:
805       case ARMneon_VCGEU:
806       case ARMneon_VMULLU:
807       case ARMneon_VPMINU:
808       case ARMneon_VPMAXU:
809          return ".u";
810       case ARMneon_VRHADDS:
811       case ARMneon_VMINS:
812       case ARMneon_VMAXS:
813       case ARMneon_VQADDS:
814       case ARMneon_VQSUBS:
815       case ARMneon_VCGTS:
816       case ARMneon_VCGES:
817       case ARMneon_VQDMULL:
818       case ARMneon_VMULLS:
819       case ARMneon_VPMINS:
820       case ARMneon_VPMAXS:
821       case ARMneon_VQDMULH:
822       case ARMneon_VQRDMULH:
823          return ".s";
824       case ARMneon_VMULP:
825       case ARMneon_VMULLP:
826          return ".p";
827       case ARMneon_VADDFP:
828       case ARMneon_VABDFP:
829       case ARMneon_VPADDFP:
830       case ARMneon_VSUBFP:
831       case ARMneon_VMULFP:
832       case ARMneon_VMINF:
833       case ARMneon_VMAXF:
834       case ARMneon_VPMINF:
835       case ARMneon_VPMAXF:
836       case ARMneon_VCGTF:
837       case ARMneon_VCGEF:
838       case ARMneon_VCEQF:
839       case ARMneon_VRECPS:
840       case ARMneon_VRSQRTS:
841          return ".f";
842       /* ... */
843       default: vpanic("showARMNeonBinOpDataType");
844    }
845 }
846 
showARMNeonUnOp(ARMNeonUnOp op)847 const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
848    switch (op) {
849       case ARMneon_COPY: return "vmov";
850       case ARMneon_COPYLS: return "vmov";
851       case ARMneon_COPYLU: return "vmov";
852       case ARMneon_COPYN: return "vmov";
853       case ARMneon_COPYQNSS: return "vqmovn";
854       case ARMneon_COPYQNUS: return "vqmovun";
855       case ARMneon_COPYQNUU: return "vqmovn";
856       case ARMneon_NOT: return "vmvn";
857       case ARMneon_EQZ: return "vceq";
858       case ARMneon_CNT: return "vcnt";
859       case ARMneon_CLS: return "vcls";
860       case ARMneon_CLZ: return "vclz";
861       case ARMneon_DUP: return "vdup";
862       case ARMneon_PADDLS: return "vpaddl";
863       case ARMneon_PADDLU: return "vpaddl";
864       case ARMneon_VQSHLNSS: return "vqshl";
865       case ARMneon_VQSHLNUU: return "vqshl";
866       case ARMneon_VQSHLNUS: return "vqshlu";
867       case ARMneon_REV16: return "vrev16";
868       case ARMneon_REV32: return "vrev32";
869       case ARMneon_REV64: return "vrev64";
870       case ARMneon_VCVTFtoU: return "vcvt";
871       case ARMneon_VCVTFtoS: return "vcvt";
872       case ARMneon_VCVTUtoF: return "vcvt";
873       case ARMneon_VCVTStoF: return "vcvt";
874       case ARMneon_VCVTFtoFixedU: return "vcvt";
875       case ARMneon_VCVTFtoFixedS: return "vcvt";
876       case ARMneon_VCVTFixedUtoF: return "vcvt";
877       case ARMneon_VCVTFixedStoF: return "vcvt";
878       case ARMneon_VCVTF32toF16: return "vcvt";
879       case ARMneon_VCVTF16toF32: return "vcvt";
880       case ARMneon_VRECIP: return "vrecip";
881       case ARMneon_VRECIPF: return "vrecipf";
882       case ARMneon_VNEGF: return "vneg";
883       case ARMneon_ABS: return "vabs";
884       case ARMneon_VABSFP: return "vabsfp";
885       case ARMneon_VRSQRTEFP: return "vrsqrtefp";
886       case ARMneon_VRSQRTE: return "vrsqrte";
887       /* ... */
888       default: vpanic("showARMNeonUnOp");
889    }
890 }
891 
showARMNeonUnOpDataType(ARMNeonUnOp op)892 const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
893    switch (op) {
894       case ARMneon_COPY:
895       case ARMneon_NOT:
896          return "";
897       case ARMneon_COPYN:
898       case ARMneon_EQZ:
899       case ARMneon_CNT:
900       case ARMneon_DUP:
901       case ARMneon_REV16:
902       case ARMneon_REV32:
903       case ARMneon_REV64:
904          return ".i";
905       case ARMneon_COPYLU:
906       case ARMneon_PADDLU:
907       case ARMneon_COPYQNUU:
908       case ARMneon_VQSHLNUU:
909       case ARMneon_VRECIP:
910       case ARMneon_VRSQRTE:
911          return ".u";
912       case ARMneon_CLS:
913       case ARMneon_CLZ:
914       case ARMneon_COPYLS:
915       case ARMneon_PADDLS:
916       case ARMneon_COPYQNSS:
917       case ARMneon_COPYQNUS:
918       case ARMneon_VQSHLNSS:
919       case ARMneon_VQSHLNUS:
920       case ARMneon_ABS:
921          return ".s";
922       case ARMneon_VRECIPF:
923       case ARMneon_VNEGF:
924       case ARMneon_VABSFP:
925       case ARMneon_VRSQRTEFP:
926          return ".f";
927       case ARMneon_VCVTFtoU: return ".u32.f32";
928       case ARMneon_VCVTFtoS: return ".s32.f32";
929       case ARMneon_VCVTUtoF: return ".f32.u32";
930       case ARMneon_VCVTStoF: return ".f32.s32";
931       case ARMneon_VCVTF16toF32: return ".f32.f16";
932       case ARMneon_VCVTF32toF16: return ".f16.f32";
933       case ARMneon_VCVTFtoFixedU: return ".u32.f32";
934       case ARMneon_VCVTFtoFixedS: return ".s32.f32";
935       case ARMneon_VCVTFixedUtoF: return ".f32.u32";
936       case ARMneon_VCVTFixedStoF: return ".f32.s32";
937       /* ... */
938       default: vpanic("showARMNeonUnOpDataType");
939    }
940 }
941 
showARMNeonUnOpS(ARMNeonUnOpS op)942 const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
943    switch (op) {
944       case ARMneon_SETELEM: return "vmov";
945       case ARMneon_GETELEMU: return "vmov";
946       case ARMneon_GETELEMS: return "vmov";
947       case ARMneon_VDUP: return "vdup";
948       /* ... */
949       default: vpanic("showARMNeonUnarySOp");
950    }
951 }
952 
showARMNeonUnOpSDataType(ARMNeonUnOpS op)953 const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
954    switch (op) {
955       case ARMneon_SETELEM:
956       case ARMneon_VDUP:
957          return ".i";
958       case ARMneon_GETELEMS:
959          return ".s";
960       case ARMneon_GETELEMU:
961          return ".u";
962       /* ... */
963       default: vpanic("showARMNeonUnarySOp");
964    }
965 }
966 
showARMNeonShiftOp(ARMNeonShiftOp op)967 const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
968    switch (op) {
969       case ARMneon_VSHL: return "vshl";
970       case ARMneon_VSAL: return "vshl";
971       case ARMneon_VQSHL: return "vqshl";
972       case ARMneon_VQSAL: return "vqshl";
973       /* ... */
974       default: vpanic("showARMNeonShiftOp");
975    }
976 }
977 
showARMNeonShiftOpDataType(ARMNeonShiftOp op)978 const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
979    switch (op) {
980       case ARMneon_VSHL:
981       case ARMneon_VQSHL:
982          return ".u";
983       case ARMneon_VSAL:
984       case ARMneon_VQSAL:
985          return ".s";
986       /* ... */
987       default: vpanic("showARMNeonShiftOpDataType");
988    }
989 }
990 
showARMNeonDualOp(ARMNeonDualOp op)991 const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
992    switch (op) {
993       case ARMneon_TRN: return "vtrn";
994       case ARMneon_ZIP: return "vzip";
995       case ARMneon_UZP: return "vuzp";
996       /* ... */
997       default: vpanic("showARMNeonDualOp");
998    }
999 }
1000 
showARMNeonDualOpDataType(ARMNeonDualOp op)1001 const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1002    switch (op) {
1003       case ARMneon_TRN:
1004       case ARMneon_ZIP:
1005       case ARMneon_UZP:
1006          return "i";
1007       /* ... */
1008       default: vpanic("showARMNeonDualOp");
1009    }
1010 }
1011 
showARMNeonDataSize_wrk(UInt size)1012 static const HChar* showARMNeonDataSize_wrk ( UInt size )
1013 {
1014    switch (size) {
1015       case 0: return "8";
1016       case 1: return "16";
1017       case 2: return "32";
1018       case 3: return "64";
1019       default: vpanic("showARMNeonDataSize");
1020    }
1021 }
1022 
showARMNeonDataSize(const ARMInstr * i)1023 static const HChar* showARMNeonDataSize ( const ARMInstr* i )
1024 {
1025    switch (i->tag) {
1026       case ARMin_NBinary:
1027          if (i->ARMin.NBinary.op == ARMneon_VEXT)
1028             return "8";
1029          if (i->ARMin.NBinary.op == ARMneon_VAND ||
1030              i->ARMin.NBinary.op == ARMneon_VORR ||
1031              i->ARMin.NBinary.op == ARMneon_VXOR)
1032             return "";
1033          return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1034       case ARMin_NUnary:
1035          if (i->ARMin.NUnary.op == ARMneon_COPY ||
1036              i->ARMin.NUnary.op == ARMneon_NOT ||
1037              i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1038              i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1039              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1040              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1041              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1042              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1043              i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1044              i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1045              i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1046              i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1047             return "";
1048          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1049              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1050              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1051             UInt size;
1052             size = i->ARMin.NUnary.size;
1053             if (size & 0x40)
1054                return "64";
1055             if (size & 0x20)
1056                return "32";
1057             if (size & 0x10)
1058                return "16";
1059             if (size & 0x08)
1060                return "8";
1061             vpanic("showARMNeonDataSize");
1062          }
1063          return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1064       case ARMin_NUnaryS:
1065          if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1066             int size;
1067             size = i->ARMin.NUnaryS.size;
1068             if ((size & 1) == 1)
1069                return "8";
1070             if ((size & 3) == 2)
1071                return "16";
1072             if ((size & 7) == 4)
1073                return "32";
1074             vpanic("showARMNeonDataSize");
1075          }
1076          return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1077       case ARMin_NShift:
1078          return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1079       case ARMin_NDual:
1080          return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1081       default:
1082          vpanic("showARMNeonDataSize");
1083    }
1084 }
1085 
ARMInstr_Alu(ARMAluOp op,HReg dst,HReg argL,ARMRI84 * argR)1086 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
1087                          HReg dst, HReg argL, ARMRI84* argR ) {
1088    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1089    i->tag            = ARMin_Alu;
1090    i->ARMin.Alu.op   = op;
1091    i->ARMin.Alu.dst  = dst;
1092    i->ARMin.Alu.argL = argL;
1093    i->ARMin.Alu.argR = argR;
1094    return i;
1095 }
ARMInstr_Shift(ARMShiftOp op,HReg dst,HReg argL,ARMRI5 * argR)1096 ARMInstr* ARMInstr_Shift  ( ARMShiftOp op,
1097                             HReg dst, HReg argL, ARMRI5* argR ) {
1098    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1099    i->tag              = ARMin_Shift;
1100    i->ARMin.Shift.op   = op;
1101    i->ARMin.Shift.dst  = dst;
1102    i->ARMin.Shift.argL = argL;
1103    i->ARMin.Shift.argR = argR;
1104    return i;
1105 }
ARMInstr_Unary(ARMUnaryOp op,HReg dst,HReg src)1106 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
1107    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1108    i->tag             = ARMin_Unary;
1109    i->ARMin.Unary.op  = op;
1110    i->ARMin.Unary.dst = dst;
1111    i->ARMin.Unary.src = src;
1112    return i;
1113 }
ARMInstr_CmpOrTst(Bool isCmp,HReg argL,ARMRI84 * argR)1114 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
1115    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1116    i->tag                  = ARMin_CmpOrTst;
1117    i->ARMin.CmpOrTst.isCmp = isCmp;
1118    i->ARMin.CmpOrTst.argL  = argL;
1119    i->ARMin.CmpOrTst.argR  = argR;
1120    return i;
1121 }
ARMInstr_Mov(HReg dst,ARMRI84 * src)1122 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
1123    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1124    i->tag           = ARMin_Mov;
1125    i->ARMin.Mov.dst = dst;
1126    i->ARMin.Mov.src = src;
1127    return i;
1128 }
ARMInstr_Imm32(HReg dst,UInt imm32)1129 ARMInstr* ARMInstr_Imm32  ( HReg dst, UInt imm32 ) {
1130    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1131    i->tag               = ARMin_Imm32;
1132    i->ARMin.Imm32.dst   = dst;
1133    i->ARMin.Imm32.imm32 = imm32;
1134    return i;
1135 }
ARMInstr_LdSt32(ARMCondCode cc,Bool isLoad,HReg rD,ARMAMode1 * amode)1136 ARMInstr* ARMInstr_LdSt32 ( ARMCondCode cc,
1137                             Bool isLoad, HReg rD, ARMAMode1* amode ) {
1138    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1139    i->tag                 = ARMin_LdSt32;
1140    i->ARMin.LdSt32.cc     = cc;
1141    i->ARMin.LdSt32.isLoad = isLoad;
1142    i->ARMin.LdSt32.rD     = rD;
1143    i->ARMin.LdSt32.amode  = amode;
1144    vassert(cc != ARMcc_NV);
1145    return i;
1146 }
ARMInstr_LdSt16(ARMCondCode cc,Bool isLoad,Bool signedLoad,HReg rD,ARMAMode2 * amode)1147 ARMInstr* ARMInstr_LdSt16 ( ARMCondCode cc,
1148                             Bool isLoad, Bool signedLoad,
1149                             HReg rD, ARMAMode2* amode ) {
1150    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1151    i->tag                     = ARMin_LdSt16;
1152    i->ARMin.LdSt16.cc         = cc;
1153    i->ARMin.LdSt16.isLoad     = isLoad;
1154    i->ARMin.LdSt16.signedLoad = signedLoad;
1155    i->ARMin.LdSt16.rD         = rD;
1156    i->ARMin.LdSt16.amode      = amode;
1157    vassert(cc != ARMcc_NV);
1158    return i;
1159 }
ARMInstr_LdSt8U(ARMCondCode cc,Bool isLoad,HReg rD,ARMAMode1 * amode)1160 ARMInstr* ARMInstr_LdSt8U ( ARMCondCode cc,
1161                             Bool isLoad, HReg rD, ARMAMode1* amode ) {
1162    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1163    i->tag                 = ARMin_LdSt8U;
1164    i->ARMin.LdSt8U.cc     = cc;
1165    i->ARMin.LdSt8U.isLoad = isLoad;
1166    i->ARMin.LdSt8U.rD     = rD;
1167    i->ARMin.LdSt8U.amode  = amode;
1168    vassert(cc != ARMcc_NV);
1169    return i;
1170 }
ARMInstr_Ld8S(ARMCondCode cc,HReg rD,ARMAMode2 * amode)1171 ARMInstr* ARMInstr_Ld8S ( ARMCondCode cc, HReg rD, ARMAMode2* amode ) {
1172    ARMInstr* i         = LibVEX_Alloc_inline(sizeof(ARMInstr));
1173    i->tag              = ARMin_Ld8S;
1174    i->ARMin.Ld8S.cc    = cc;
1175    i->ARMin.Ld8S.rD    = rD;
1176    i->ARMin.Ld8S.amode = amode;
1177    vassert(cc != ARMcc_NV);
1178    return i;
1179 }
ARMInstr_XDirect(Addr32 dstGA,ARMAMode1 * amR15T,ARMCondCode cond,Bool toFastEP)1180 ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
1181                              ARMCondCode cond, Bool toFastEP ) {
1182    ARMInstr* i               = LibVEX_Alloc_inline(sizeof(ARMInstr));
1183    i->tag                    = ARMin_XDirect;
1184    i->ARMin.XDirect.dstGA    = dstGA;
1185    i->ARMin.XDirect.amR15T   = amR15T;
1186    i->ARMin.XDirect.cond     = cond;
1187    i->ARMin.XDirect.toFastEP = toFastEP;
1188    return i;
1189 }
ARMInstr_XIndir(HReg dstGA,ARMAMode1 * amR15T,ARMCondCode cond)1190 ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
1191                             ARMCondCode cond ) {
1192    ARMInstr* i            = LibVEX_Alloc_inline(sizeof(ARMInstr));
1193    i->tag                 = ARMin_XIndir;
1194    i->ARMin.XIndir.dstGA  = dstGA;
1195    i->ARMin.XIndir.amR15T = amR15T;
1196    i->ARMin.XIndir.cond   = cond;
1197    return i;
1198 }
ARMInstr_XAssisted(HReg dstGA,ARMAMode1 * amR15T,ARMCondCode cond,IRJumpKind jk)1199 ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
1200                                ARMCondCode cond, IRJumpKind jk ) {
1201    ARMInstr* i               = LibVEX_Alloc_inline(sizeof(ARMInstr));
1202    i->tag                    = ARMin_XAssisted;
1203    i->ARMin.XAssisted.dstGA  = dstGA;
1204    i->ARMin.XAssisted.amR15T = amR15T;
1205    i->ARMin.XAssisted.cond   = cond;
1206    i->ARMin.XAssisted.jk     = jk;
1207    return i;
1208 }
ARMInstr_CMov(ARMCondCode cond,HReg dst,ARMRI84 * src)1209 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
1210    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1211    i->tag             = ARMin_CMov;
1212    i->ARMin.CMov.cond = cond;
1213    i->ARMin.CMov.dst  = dst;
1214    i->ARMin.CMov.src  = src;
1215    vassert(cond != ARMcc_AL);
1216    return i;
1217 }
ARMInstr_Call(ARMCondCode cond,Addr32 target,Int nArgRegs,RetLoc rloc)1218 ARMInstr* ARMInstr_Call ( ARMCondCode cond, Addr32 target, Int nArgRegs,
1219                           RetLoc rloc ) {
1220    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1221    i->tag                 = ARMin_Call;
1222    i->ARMin.Call.cond     = cond;
1223    i->ARMin.Call.target   = target;
1224    i->ARMin.Call.nArgRegs = nArgRegs;
1225    i->ARMin.Call.rloc     = rloc;
1226    vassert(is_sane_RetLoc(rloc));
1227    return i;
1228 }
ARMInstr_Mul(ARMMulOp op)1229 ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
1230    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1231    i->tag          = ARMin_Mul;
1232    i->ARMin.Mul.op = op;
1233    return i;
1234 }
ARMInstr_LdrEX(Int szB)1235 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
1236    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1237    i->tag             = ARMin_LdrEX;
1238    i->ARMin.LdrEX.szB = szB;
1239    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1240    return i;
1241 }
ARMInstr_StrEX(Int szB)1242 ARMInstr* ARMInstr_StrEX ( Int szB ) {
1243    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1244    i->tag             = ARMin_StrEX;
1245    i->ARMin.StrEX.szB = szB;
1246    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1247    return i;
1248 }
ARMInstr_VLdStD(Bool isLoad,HReg dD,ARMAModeV * am)1249 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
1250    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1251    i->tag                 = ARMin_VLdStD;
1252    i->ARMin.VLdStD.isLoad = isLoad;
1253    i->ARMin.VLdStD.dD     = dD;
1254    i->ARMin.VLdStD.amode  = am;
1255    return i;
1256 }
ARMInstr_VLdStS(Bool isLoad,HReg fD,ARMAModeV * am)1257 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
1258    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1259    i->tag                 = ARMin_VLdStS;
1260    i->ARMin.VLdStS.isLoad = isLoad;
1261    i->ARMin.VLdStS.fD     = fD;
1262    i->ARMin.VLdStS.amode  = am;
1263    return i;
1264 }
ARMInstr_VAluD(ARMVfpOp op,HReg dst,HReg argL,HReg argR)1265 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1266    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1267    i->tag              = ARMin_VAluD;
1268    i->ARMin.VAluD.op   = op;
1269    i->ARMin.VAluD.dst  = dst;
1270    i->ARMin.VAluD.argL = argL;
1271    i->ARMin.VAluD.argR = argR;
1272    return i;
1273 }
ARMInstr_VAluS(ARMVfpOp op,HReg dst,HReg argL,HReg argR)1274 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1275    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1276    i->tag              = ARMin_VAluS;
1277    i->ARMin.VAluS.op   = op;
1278    i->ARMin.VAluS.dst  = dst;
1279    i->ARMin.VAluS.argL = argL;
1280    i->ARMin.VAluS.argR = argR;
1281    return i;
1282 }
ARMInstr_VUnaryD(ARMVfpUnaryOp op,HReg dst,HReg src)1283 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1284    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1285    i->tag               = ARMin_VUnaryD;
1286    i->ARMin.VUnaryD.op  = op;
1287    i->ARMin.VUnaryD.dst = dst;
1288    i->ARMin.VUnaryD.src = src;
1289    return i;
1290 }
ARMInstr_VUnaryS(ARMVfpUnaryOp op,HReg dst,HReg src)1291 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1292    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1293    i->tag               = ARMin_VUnaryS;
1294    i->ARMin.VUnaryS.op  = op;
1295    i->ARMin.VUnaryS.dst = dst;
1296    i->ARMin.VUnaryS.src = src;
1297    return i;
1298 }
ARMInstr_VCmpD(HReg argL,HReg argR)1299 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
1300    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1301    i->tag              = ARMin_VCmpD;
1302    i->ARMin.VCmpD.argL = argL;
1303    i->ARMin.VCmpD.argR = argR;
1304    return i;
1305 }
ARMInstr_VCMovD(ARMCondCode cond,HReg dst,HReg src)1306 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1307    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1308    i->tag               = ARMin_VCMovD;
1309    i->ARMin.VCMovD.cond = cond;
1310    i->ARMin.VCMovD.dst  = dst;
1311    i->ARMin.VCMovD.src  = src;
1312    vassert(cond != ARMcc_AL);
1313    return i;
1314 }
ARMInstr_VCMovS(ARMCondCode cond,HReg dst,HReg src)1315 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1316    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1317    i->tag               = ARMin_VCMovS;
1318    i->ARMin.VCMovS.cond = cond;
1319    i->ARMin.VCMovS.dst  = dst;
1320    i->ARMin.VCMovS.src  = src;
1321    vassert(cond != ARMcc_AL);
1322    return i;
1323 }
ARMInstr_VCvtSD(Bool sToD,HReg dst,HReg src)1324 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1325    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1326    i->tag               = ARMin_VCvtSD;
1327    i->ARMin.VCvtSD.sToD = sToD;
1328    i->ARMin.VCvtSD.dst  = dst;
1329    i->ARMin.VCvtSD.src  = src;
1330    return i;
1331 }
ARMInstr_VXferD(Bool toD,HReg dD,HReg rHi,HReg rLo)1332 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1333    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1334    i->tag              = ARMin_VXferD;
1335    i->ARMin.VXferD.toD = toD;
1336    i->ARMin.VXferD.dD  = dD;
1337    i->ARMin.VXferD.rHi = rHi;
1338    i->ARMin.VXferD.rLo = rLo;
1339    return i;
1340 }
ARMInstr_VXferS(Bool toS,HReg fD,HReg rLo)1341 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1342    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1343    i->tag              = ARMin_VXferS;
1344    i->ARMin.VXferS.toS = toS;
1345    i->ARMin.VXferS.fD  = fD;
1346    i->ARMin.VXferS.rLo = rLo;
1347    return i;
1348 }
ARMInstr_VCvtID(Bool iToD,Bool syned,HReg dst,HReg src)1349 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1350                             HReg dst, HReg src ) {
1351    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1352    i->tag                = ARMin_VCvtID;
1353    i->ARMin.VCvtID.iToD  = iToD;
1354    i->ARMin.VCvtID.syned = syned;
1355    i->ARMin.VCvtID.dst   = dst;
1356    i->ARMin.VCvtID.src   = src;
1357    return i;
1358 }
ARMInstr_FPSCR(Bool toFPSCR,HReg iReg)1359 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
1360    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1361    i->tag                 = ARMin_FPSCR;
1362    i->ARMin.FPSCR.toFPSCR = toFPSCR;
1363    i->ARMin.FPSCR.iReg    = iReg;
1364    return i;
1365 }
ARMInstr_MFence(void)1366 ARMInstr* ARMInstr_MFence ( void ) {
1367    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1368    i->tag      = ARMin_MFence;
1369    return i;
1370 }
ARMInstr_CLREX(void)1371 ARMInstr* ARMInstr_CLREX( void ) {
1372    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1373    i->tag      = ARMin_CLREX;
1374    return i;
1375 }
1376 
ARMInstr_NLdStQ(Bool isLoad,HReg dQ,ARMAModeN * amode)1377 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
1378    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1379    i->tag                  = ARMin_NLdStQ;
1380    i->ARMin.NLdStQ.isLoad  = isLoad;
1381    i->ARMin.NLdStQ.dQ      = dQ;
1382    i->ARMin.NLdStQ.amode   = amode;
1383    return i;
1384 }
1385 
ARMInstr_NLdStD(Bool isLoad,HReg dD,ARMAModeN * amode)1386 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1387    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1388    i->tag                  = ARMin_NLdStD;
1389    i->ARMin.NLdStD.isLoad  = isLoad;
1390    i->ARMin.NLdStD.dD      = dD;
1391    i->ARMin.NLdStD.amode   = amode;
1392    return i;
1393 }
1394 
ARMInstr_NUnary(ARMNeonUnOp op,HReg dQ,HReg nQ,UInt size,Bool Q)1395 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1396                             UInt size, Bool Q ) {
1397    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1398    i->tag                = ARMin_NUnary;
1399    i->ARMin.NUnary.op   = op;
1400    i->ARMin.NUnary.src  = nQ;
1401    i->ARMin.NUnary.dst  = dQ;
1402    i->ARMin.NUnary.size = size;
1403    i->ARMin.NUnary.Q    = Q;
1404    return i;
1405 }
1406 
ARMInstr_NUnaryS(ARMNeonUnOpS op,ARMNRS * dst,ARMNRS * src,UInt size,Bool Q)1407 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
1408                              UInt size, Bool Q ) {
1409    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1410    i->tag                = ARMin_NUnaryS;
1411    i->ARMin.NUnaryS.op   = op;
1412    i->ARMin.NUnaryS.src  = src;
1413    i->ARMin.NUnaryS.dst  = dst;
1414    i->ARMin.NUnaryS.size = size;
1415    i->ARMin.NUnaryS.Q    = Q;
1416    return i;
1417 }
1418 
ARMInstr_NDual(ARMNeonDualOp op,HReg nQ,HReg mQ,UInt size,Bool Q)1419 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1420                            UInt size, Bool Q ) {
1421    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1422    i->tag                = ARMin_NDual;
1423    i->ARMin.NDual.op   = op;
1424    i->ARMin.NDual.arg1 = nQ;
1425    i->ARMin.NDual.arg2 = mQ;
1426    i->ARMin.NDual.size = size;
1427    i->ARMin.NDual.Q    = Q;
1428    return i;
1429 }
1430 
ARMInstr_NBinary(ARMNeonBinOp op,HReg dst,HReg argL,HReg argR,UInt size,Bool Q)1431 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1432                              HReg dst, HReg argL, HReg argR,
1433                              UInt size, Bool Q ) {
1434    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1435    i->tag                = ARMin_NBinary;
1436    i->ARMin.NBinary.op   = op;
1437    i->ARMin.NBinary.argL = argL;
1438    i->ARMin.NBinary.argR = argR;
1439    i->ARMin.NBinary.dst  = dst;
1440    i->ARMin.NBinary.size = size;
1441    i->ARMin.NBinary.Q    = Q;
1442    return i;
1443 }
1444 
ARMInstr_NeonImm(HReg dst,ARMNImm * imm)1445 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
1446    ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1447    i->tag         = ARMin_NeonImm;
1448    i->ARMin.NeonImm.dst = dst;
1449    i->ARMin.NeonImm.imm = imm;
1450    return i;
1451 }
1452 
ARMInstr_NCMovQ(ARMCondCode cond,HReg dst,HReg src)1453 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1454    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1455    i->tag               = ARMin_NCMovQ;
1456    i->ARMin.NCMovQ.cond = cond;
1457    i->ARMin.NCMovQ.dst  = dst;
1458    i->ARMin.NCMovQ.src  = src;
1459    vassert(cond != ARMcc_AL);
1460    return i;
1461 }
1462 
ARMInstr_NShift(ARMNeonShiftOp op,HReg dst,HReg argL,HReg argR,UInt size,Bool Q)1463 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1464                             HReg dst, HReg argL, HReg argR,
1465                             UInt size, Bool Q ) {
1466    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1467    i->tag                = ARMin_NShift;
1468    i->ARMin.NShift.op   = op;
1469    i->ARMin.NShift.argL = argL;
1470    i->ARMin.NShift.argR = argR;
1471    i->ARMin.NShift.dst  = dst;
1472    i->ARMin.NShift.size = size;
1473    i->ARMin.NShift.Q    = Q;
1474    return i;
1475 }
1476 
ARMInstr_NShl64(HReg dst,HReg src,UInt amt)1477 ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
1478 {
1479    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1480    i->tag              = ARMin_NShl64;
1481    i->ARMin.NShl64.dst = dst;
1482    i->ARMin.NShl64.src = src;
1483    i->ARMin.NShl64.amt = amt;
1484    vassert(amt >= 1 && amt <= 63);
1485    return i;
1486 }
1487 
1488 /* Helper copy-pasted from isel.c */
fitsIn8x4(UInt * u8,UInt * u4,UInt u)1489 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1490 {
1491    UInt i;
1492    for (i = 0; i < 16; i++) {
1493       if (0 == (u & 0xFFFFFF00)) {
1494          *u8 = u;
1495          *u4 = i;
1496          return True;
1497       }
1498       u = ROR32(u, 30);
1499    }
1500    vassert(i == 16);
1501    return False;
1502 }
1503 
ARMInstr_Add32(HReg rD,HReg rN,UInt imm32)1504 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1505    UInt u8, u4;
1506    ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1507    /* Try to generate single ADD if possible */
1508    if (fitsIn8x4(&u8, &u4, imm32)) {
1509       i->tag            = ARMin_Alu;
1510       i->ARMin.Alu.op   = ARMalu_ADD;
1511       i->ARMin.Alu.dst  = rD;
1512       i->ARMin.Alu.argL = rN;
1513       i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1514    } else {
1515       i->tag               = ARMin_Add32;
1516       i->ARMin.Add32.rD    = rD;
1517       i->ARMin.Add32.rN    = rN;
1518       i->ARMin.Add32.imm32 = imm32;
1519    }
1520    return i;
1521 }
1522 
ARMInstr_EvCheck(ARMAMode1 * amCounter,ARMAMode1 * amFailAddr)1523 ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
1524                              ARMAMode1* amFailAddr ) {
1525    ARMInstr* i                 = LibVEX_Alloc_inline(sizeof(ARMInstr));
1526    i->tag                      = ARMin_EvCheck;
1527    i->ARMin.EvCheck.amCounter  = amCounter;
1528    i->ARMin.EvCheck.amFailAddr = amFailAddr;
1529    return i;
1530 }
1531 
ARMInstr_ProfInc(void)1532 ARMInstr* ARMInstr_ProfInc ( void ) {
1533    ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1534    i->tag      = ARMin_ProfInc;
1535    return i;
1536 }
1537 
1538 /* ... */
1539 
ppARMInstr(const ARMInstr * i)1540 void ppARMInstr ( const ARMInstr* i ) {
1541    switch (i->tag) {
1542       case ARMin_Alu:
1543          vex_printf("%-4s  ", showARMAluOp(i->ARMin.Alu.op));
1544          ppHRegARM(i->ARMin.Alu.dst);
1545          vex_printf(", ");
1546          ppHRegARM(i->ARMin.Alu.argL);
1547          vex_printf(", ");
1548          ppARMRI84(i->ARMin.Alu.argR);
1549          return;
1550       case ARMin_Shift:
1551          vex_printf("%s   ", showARMShiftOp(i->ARMin.Shift.op));
1552          ppHRegARM(i->ARMin.Shift.dst);
1553          vex_printf(", ");
1554          ppHRegARM(i->ARMin.Shift.argL);
1555          vex_printf(", ");
1556          ppARMRI5(i->ARMin.Shift.argR);
1557          return;
1558       case ARMin_Unary:
1559          vex_printf("%s   ", showARMUnaryOp(i->ARMin.Unary.op));
1560          ppHRegARM(i->ARMin.Unary.dst);
1561          vex_printf(", ");
1562          ppHRegARM(i->ARMin.Unary.src);
1563          return;
1564       case ARMin_CmpOrTst:
1565          vex_printf("%s   ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
1566          ppHRegARM(i->ARMin.CmpOrTst.argL);
1567          vex_printf(", ");
1568          ppARMRI84(i->ARMin.CmpOrTst.argR);
1569          return;
1570       case ARMin_Mov:
1571          vex_printf("mov   ");
1572          ppHRegARM(i->ARMin.Mov.dst);
1573          vex_printf(", ");
1574          ppARMRI84(i->ARMin.Mov.src);
1575          return;
1576       case ARMin_Imm32:
1577          vex_printf("imm   ");
1578          ppHRegARM(i->ARMin.Imm32.dst);
1579          vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
1580          return;
1581       case ARMin_LdSt32:
1582          if (i->ARMin.LdSt32.isLoad) {
1583             vex_printf("ldr%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? "  "
1584                                     : showARMCondCode(i->ARMin.LdSt32.cc));
1585             ppHRegARM(i->ARMin.LdSt32.rD);
1586             vex_printf(", ");
1587             ppARMAMode1(i->ARMin.LdSt32.amode);
1588          } else {
1589             vex_printf("str%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? "  "
1590                                     : showARMCondCode(i->ARMin.LdSt32.cc));
1591             ppARMAMode1(i->ARMin.LdSt32.amode);
1592             vex_printf(", ");
1593             ppHRegARM(i->ARMin.LdSt32.rD);
1594          }
1595          return;
1596       case ARMin_LdSt16:
1597          if (i->ARMin.LdSt16.isLoad) {
1598             vex_printf("%s%s%s",
1599                        i->ARMin.LdSt16.signedLoad ? "ldrsh" : "ldrh",
1600                        i->ARMin.LdSt16.cc == ARMcc_AL ? "  "
1601                           : showARMCondCode(i->ARMin.LdSt16.cc),
1602                        i->ARMin.LdSt16.signedLoad ? " " : "  ");
1603             ppHRegARM(i->ARMin.LdSt16.rD);
1604             vex_printf(", ");
1605             ppARMAMode2(i->ARMin.LdSt16.amode);
1606          } else {
1607             vex_printf("strh%s  ",
1608                        i->ARMin.LdSt16.cc == ARMcc_AL ? "  "
1609                           : showARMCondCode(i->ARMin.LdSt16.cc));
1610             ppARMAMode2(i->ARMin.LdSt16.amode);
1611             vex_printf(", ");
1612             ppHRegARM(i->ARMin.LdSt16.rD);
1613          }
1614          return;
1615       case ARMin_LdSt8U:
1616          if (i->ARMin.LdSt8U.isLoad) {
1617             vex_printf("ldrb%s  ", i->ARMin.LdSt8U.cc == ARMcc_AL ? "  "
1618                                       : showARMCondCode(i->ARMin.LdSt8U.cc));
1619             ppHRegARM(i->ARMin.LdSt8U.rD);
1620             vex_printf(", ");
1621             ppARMAMode1(i->ARMin.LdSt8U.amode);
1622          } else {
1623             vex_printf("strb%s  ", i->ARMin.LdSt8U.cc == ARMcc_AL ? "  "
1624                                       : showARMCondCode(i->ARMin.LdSt8U.cc));
1625             ppARMAMode1(i->ARMin.LdSt8U.amode);
1626             vex_printf(", ");
1627             ppHRegARM(i->ARMin.LdSt8U.rD);
1628          }
1629          return;
1630       case ARMin_Ld8S:
1631          vex_printf("ldrsb%s ", i->ARMin.Ld8S.cc == ARMcc_AL ? "  "
1632                                    : showARMCondCode(i->ARMin.Ld8S.cc));
1633          ppARMAMode2(i->ARMin.Ld8S.amode);
1634          vex_printf(", ");
1635          ppHRegARM(i->ARMin.Ld8S.rD);
1636          return;
1637       case ARMin_XDirect:
1638          vex_printf("(xDirect) ");
1639          vex_printf("if (%%cpsr.%s) { ",
1640                     showARMCondCode(i->ARMin.XDirect.cond));
1641          vex_printf("movw r12,0x%x; ",
1642                     (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
1643          vex_printf("movt r12,0x%x; ",
1644                     (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
1645          vex_printf("str r12,");
1646          ppARMAMode1(i->ARMin.XDirect.amR15T);
1647          vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
1648                     i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1649          vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
1650                     i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1651          vex_printf("blx r12 }");
1652          return;
1653       case ARMin_XIndir:
1654          vex_printf("(xIndir) ");
1655          vex_printf("if (%%cpsr.%s) { ",
1656                     showARMCondCode(i->ARMin.XIndir.cond));
1657          vex_printf("str ");
1658          ppHRegARM(i->ARMin.XIndir.dstGA);
1659          vex_printf(",");
1660          ppARMAMode1(i->ARMin.XIndir.amR15T);
1661          vex_printf("; movw r12,LO16($disp_cp_xindir); ");
1662          vex_printf("movt r12,HI16($disp_cp_xindir); ");
1663          vex_printf("blx r12 }");
1664          return;
1665       case ARMin_XAssisted:
1666          vex_printf("(xAssisted) ");
1667          vex_printf("if (%%cpsr.%s) { ",
1668                     showARMCondCode(i->ARMin.XAssisted.cond));
1669          vex_printf("str ");
1670          ppHRegARM(i->ARMin.XAssisted.dstGA);
1671          vex_printf(",");
1672          ppARMAMode1(i->ARMin.XAssisted.amR15T);
1673          vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
1674                     (Int)i->ARMin.XAssisted.jk);
1675          vex_printf("movw r12,LO16($disp_cp_xassisted); ");
1676          vex_printf("movt r12,HI16($disp_cp_xassisted); ");
1677          vex_printf("blx r12 }");
1678          return;
1679       case ARMin_CMov:
1680          vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
1681          ppHRegARM(i->ARMin.CMov.dst);
1682          vex_printf(", ");
1683          ppARMRI84(i->ARMin.CMov.src);
1684          return;
1685       case ARMin_Call:
1686          vex_printf("call%s  ",
1687                     i->ARMin.Call.cond==ARMcc_AL
1688                        ? "" : showARMCondCode(i->ARMin.Call.cond));
1689          vex_printf("0x%x [nArgRegs=%d, ",
1690                     i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
1691          ppRetLoc(i->ARMin.Call.rloc);
1692          vex_printf("]");
1693          return;
1694       case ARMin_Mul:
1695          vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
1696          if (i->ARMin.Mul.op == ARMmul_PLAIN) {
1697             vex_printf("r0, r2, r3");
1698          } else {
1699             vex_printf("r1:r0, r2, r3");
1700          }
1701          return;
1702       case ARMin_LdrEX: {
1703          const HChar* sz = "";
1704          switch (i->ARMin.LdrEX.szB) {
1705             case 1: sz = "b"; break; case 2: sz = "h"; break;
1706             case 8: sz = "d"; break; case 4: break;
1707             default: vassert(0);
1708          }
1709          vex_printf("ldrex%s %sr2, [r4]",
1710                     sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
1711          return;
1712       }
1713       case ARMin_StrEX: {
1714          const HChar* sz = "";
1715          switch (i->ARMin.StrEX.szB) {
1716             case 1: sz = "b"; break; case 2: sz = "h"; break;
1717             case 8: sz = "d"; break; case 4: break;
1718             default: vassert(0);
1719          }
1720          vex_printf("strex%s r0, %sr2, [r4]",
1721                     sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
1722          return;
1723       }
1724       case ARMin_VLdStD:
1725          if (i->ARMin.VLdStD.isLoad) {
1726             vex_printf("fldd  ");
1727             ppHRegARM(i->ARMin.VLdStD.dD);
1728             vex_printf(", ");
1729             ppARMAModeV(i->ARMin.VLdStD.amode);
1730          } else {
1731             vex_printf("fstd  ");
1732             ppARMAModeV(i->ARMin.VLdStD.amode);
1733             vex_printf(", ");
1734             ppHRegARM(i->ARMin.VLdStD.dD);
1735          }
1736          return;
1737       case ARMin_VLdStS:
1738          if (i->ARMin.VLdStS.isLoad) {
1739             vex_printf("flds  ");
1740             ppHRegARM(i->ARMin.VLdStS.fD);
1741             vex_printf(", ");
1742             ppARMAModeV(i->ARMin.VLdStS.amode);
1743          } else {
1744             vex_printf("fsts  ");
1745             ppARMAModeV(i->ARMin.VLdStS.amode);
1746             vex_printf(", ");
1747             ppHRegARM(i->ARMin.VLdStS.fD);
1748          }
1749          return;
1750       case ARMin_VAluD:
1751          vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
1752          ppHRegARM(i->ARMin.VAluD.dst);
1753          vex_printf(", ");
1754          ppHRegARM(i->ARMin.VAluD.argL);
1755          vex_printf(", ");
1756          ppHRegARM(i->ARMin.VAluD.argR);
1757          return;
1758       case ARMin_VAluS:
1759          vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
1760          ppHRegARM(i->ARMin.VAluS.dst);
1761          vex_printf(", ");
1762          ppHRegARM(i->ARMin.VAluS.argL);
1763          vex_printf(", ");
1764          ppHRegARM(i->ARMin.VAluS.argR);
1765          return;
1766       case ARMin_VUnaryD:
1767          vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
1768          ppHRegARM(i->ARMin.VUnaryD.dst);
1769          vex_printf(", ");
1770          ppHRegARM(i->ARMin.VUnaryD.src);
1771          return;
1772       case ARMin_VUnaryS:
1773          vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
1774          ppHRegARM(i->ARMin.VUnaryS.dst);
1775          vex_printf(", ");
1776          ppHRegARM(i->ARMin.VUnaryS.src);
1777          return;
1778       case ARMin_VCmpD:
1779          vex_printf("fcmpd ");
1780          ppHRegARM(i->ARMin.VCmpD.argL);
1781          vex_printf(", ");
1782          ppHRegARM(i->ARMin.VCmpD.argR);
1783          vex_printf(" ; fmstat");
1784          return;
1785       case ARMin_VCMovD:
1786          vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
1787          ppHRegARM(i->ARMin.VCMovD.dst);
1788          vex_printf(", ");
1789          ppHRegARM(i->ARMin.VCMovD.src);
1790          return;
1791       case ARMin_VCMovS:
1792          vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
1793          ppHRegARM(i->ARMin.VCMovS.dst);
1794          vex_printf(", ");
1795          ppHRegARM(i->ARMin.VCMovS.src);
1796          return;
1797       case ARMin_VCvtSD:
1798          vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
1799          ppHRegARM(i->ARMin.VCvtSD.dst);
1800          vex_printf(", ");
1801          ppHRegARM(i->ARMin.VCvtSD.src);
1802          return;
1803       case ARMin_VXferD:
1804          vex_printf("vmov  ");
1805          if (i->ARMin.VXferD.toD) {
1806             ppHRegARM(i->ARMin.VXferD.dD);
1807             vex_printf(", ");
1808             ppHRegARM(i->ARMin.VXferD.rLo);
1809             vex_printf(", ");
1810             ppHRegARM(i->ARMin.VXferD.rHi);
1811          } else {
1812             ppHRegARM(i->ARMin.VXferD.rLo);
1813             vex_printf(", ");
1814             ppHRegARM(i->ARMin.VXferD.rHi);
1815             vex_printf(", ");
1816             ppHRegARM(i->ARMin.VXferD.dD);
1817          }
1818          return;
1819       case ARMin_VXferS:
1820          vex_printf("vmov  ");
1821          if (i->ARMin.VXferS.toS) {
1822             ppHRegARM(i->ARMin.VXferS.fD);
1823             vex_printf(", ");
1824             ppHRegARM(i->ARMin.VXferS.rLo);
1825          } else {
1826             ppHRegARM(i->ARMin.VXferS.rLo);
1827             vex_printf(", ");
1828             ppHRegARM(i->ARMin.VXferS.fD);
1829          }
1830          return;
1831       case ARMin_VCvtID: {
1832          const HChar* nm = "?";
1833          if (i->ARMin.VCvtID.iToD) {
1834             nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
1835          } else {
1836             nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
1837          }
1838          vex_printf("%s ", nm);
1839          ppHRegARM(i->ARMin.VCvtID.dst);
1840          vex_printf(", ");
1841          ppHRegARM(i->ARMin.VCvtID.src);
1842          return;
1843       }
1844       case ARMin_FPSCR:
1845          if (i->ARMin.FPSCR.toFPSCR) {
1846             vex_printf("fmxr  fpscr, ");
1847             ppHRegARM(i->ARMin.FPSCR.iReg);
1848          } else {
1849             vex_printf("fmrx  ");
1850             ppHRegARM(i->ARMin.FPSCR.iReg);
1851             vex_printf(", fpscr");
1852          }
1853          return;
1854       case ARMin_MFence:
1855          vex_printf("(mfence) dsb sy; dmb sy; isb");
1856          return;
1857       case ARMin_CLREX:
1858          vex_printf("clrex");
1859          return;
1860       case ARMin_NLdStQ:
1861          if (i->ARMin.NLdStQ.isLoad)
1862             vex_printf("vld1.32 {");
1863          else
1864             vex_printf("vst1.32 {");
1865          ppHRegARM(i->ARMin.NLdStQ.dQ);
1866          vex_printf("} ");
1867          ppARMAModeN(i->ARMin.NLdStQ.amode);
1868          return;
1869       case ARMin_NLdStD:
1870          if (i->ARMin.NLdStD.isLoad)
1871             vex_printf("vld1.32 {");
1872          else
1873             vex_printf("vst1.32 {");
1874          ppHRegARM(i->ARMin.NLdStD.dD);
1875          vex_printf("} ");
1876          ppARMAModeN(i->ARMin.NLdStD.amode);
1877          return;
1878       case ARMin_NUnary:
1879          vex_printf("%s%s%s  ",
1880                     showARMNeonUnOp(i->ARMin.NUnary.op),
1881                     showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1882                     showARMNeonDataSize(i));
1883          ppHRegARM(i->ARMin.NUnary.dst);
1884          vex_printf(", ");
1885          ppHRegARM(i->ARMin.NUnary.src);
1886          if (i->ARMin.NUnary.op == ARMneon_EQZ)
1887             vex_printf(", #0");
1888          if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1889              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1890              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1891              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
1892             vex_printf(", #%u", i->ARMin.NUnary.size);
1893          }
1894          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1895              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1896              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1897             UInt size;
1898             size = i->ARMin.NUnary.size;
1899             if (size & 0x40) {
1900                vex_printf(", #%u", size - 64);
1901             } else if (size & 0x20) {
1902                vex_printf(", #%u", size - 32);
1903             } else if (size & 0x10) {
1904                vex_printf(", #%u", size - 16);
1905             } else if (size & 0x08) {
1906                vex_printf(", #%u", size - 8);
1907             }
1908          }
1909          return;
1910       case ARMin_NUnaryS:
1911          vex_printf("%s%s%s  ",
1912                     showARMNeonUnOpS(i->ARMin.NUnaryS.op),
1913                     showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
1914                     showARMNeonDataSize(i));
1915          ppARMNRS(i->ARMin.NUnaryS.dst);
1916          vex_printf(", ");
1917          ppARMNRS(i->ARMin.NUnaryS.src);
1918          return;
1919       case ARMin_NShift:
1920          vex_printf("%s%s%s  ",
1921                     showARMNeonShiftOp(i->ARMin.NShift.op),
1922                     showARMNeonShiftOpDataType(i->ARMin.NShift.op),
1923                     showARMNeonDataSize(i));
1924          ppHRegARM(i->ARMin.NShift.dst);
1925          vex_printf(", ");
1926          ppHRegARM(i->ARMin.NShift.argL);
1927          vex_printf(", ");
1928          ppHRegARM(i->ARMin.NShift.argR);
1929          return;
1930       case ARMin_NShl64:
1931          vex_printf("vshl.i64 ");
1932          ppHRegARM(i->ARMin.NShl64.dst);
1933          vex_printf(", ");
1934          ppHRegARM(i->ARMin.NShl64.src);
1935          vex_printf(", #%u", i->ARMin.NShl64.amt);
1936          return;
1937       case ARMin_NDual:
1938          vex_printf("%s%s%s  ",
1939                     showARMNeonDualOp(i->ARMin.NDual.op),
1940                     showARMNeonDualOpDataType(i->ARMin.NDual.op),
1941                     showARMNeonDataSize(i));
1942          ppHRegARM(i->ARMin.NDual.arg1);
1943          vex_printf(", ");
1944          ppHRegARM(i->ARMin.NDual.arg2);
1945          return;
1946       case ARMin_NBinary:
1947          vex_printf("%s%s%s",
1948                     showARMNeonBinOp(i->ARMin.NBinary.op),
1949                     showARMNeonBinOpDataType(i->ARMin.NBinary.op),
1950                     showARMNeonDataSize(i));
1951          vex_printf("  ");
1952          ppHRegARM(i->ARMin.NBinary.dst);
1953          vex_printf(", ");
1954          ppHRegARM(i->ARMin.NBinary.argL);
1955          vex_printf(", ");
1956          ppHRegARM(i->ARMin.NBinary.argR);
1957          return;
1958       case ARMin_NeonImm:
1959          vex_printf("vmov  ");
1960          ppHRegARM(i->ARMin.NeonImm.dst);
1961          vex_printf(", ");
1962          ppARMNImm(i->ARMin.NeonImm.imm);
1963          return;
1964       case ARMin_NCMovQ:
1965          vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
1966          ppHRegARM(i->ARMin.NCMovQ.dst);
1967          vex_printf(", ");
1968          ppHRegARM(i->ARMin.NCMovQ.src);
1969          return;
1970       case ARMin_Add32:
1971          vex_printf("add32 ");
1972          ppHRegARM(i->ARMin.Add32.rD);
1973          vex_printf(", ");
1974          ppHRegARM(i->ARMin.Add32.rN);
1975          vex_printf(", ");
1976          vex_printf("%u", i->ARMin.Add32.imm32);
1977          return;
1978       case ARMin_EvCheck:
1979          vex_printf("(evCheck) ldr r12,");
1980          ppARMAMode1(i->ARMin.EvCheck.amCounter);
1981          vex_printf("; subs r12,r12,$1; str r12,");
1982          ppARMAMode1(i->ARMin.EvCheck.amCounter);
1983          vex_printf("; bpl nofail; ldr r12,");
1984          ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
1985          vex_printf("; bx r12; nofail:");
1986          return;
1987       case ARMin_ProfInc:
1988          vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
1989                     "movw r12,HI16($NotKnownYet); "
1990                     "ldr r11,[r12]; "
1991                     "adds r11,r11,$1; "
1992                     "str r11,[r12]; "
1993                     "ldr r11,[r12+4]; "
1994                     "adc r11,r11,$0; "
1995                     "str r11,[r12+4]");
1996          return;
1997       default:
1998          vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
1999          vpanic("ppARMInstr(1)");
2000          return;
2001    }
2002 }
2003 
2004 
2005 /* --------- Helpers for register allocation. --------- */
2006 
getRegUsage_ARMInstr(HRegUsage * u,const ARMInstr * i,Bool mode64)2007 void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
2008 {
2009    vassert(mode64 == False);
2010    initHRegUsage(u);
2011    switch (i->tag) {
2012       case ARMin_Alu:
2013          addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
2014          addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
2015          addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
2016          return;
2017       case ARMin_Shift:
2018          addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
2019          addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
2020          addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
2021          return;
2022       case ARMin_Unary:
2023          addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
2024          addHRegUse(u, HRmRead, i->ARMin.Unary.src);
2025          return;
2026       case ARMin_CmpOrTst:
2027          addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
2028          addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
2029          return;
2030       case ARMin_Mov:
2031          addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
2032          addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
2033          return;
2034       case ARMin_Imm32:
2035          addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
2036          return;
2037       case ARMin_LdSt32:
2038          addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
2039          if (i->ARMin.LdSt32.isLoad) {
2040             addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
2041             if (i->ARMin.LdSt32.cc != ARMcc_AL)
2042                addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2043          } else {
2044             addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2045          }
2046          return;
2047       case ARMin_LdSt16:
2048          addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
2049          if (i->ARMin.LdSt16.isLoad) {
2050             addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
2051             if (i->ARMin.LdSt16.cc != ARMcc_AL)
2052                addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2053          } else {
2054             addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2055          }
2056          return;
2057       case ARMin_LdSt8U:
2058          addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
2059          if (i->ARMin.LdSt8U.isLoad) {
2060             addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
2061             if (i->ARMin.LdSt8U.cc != ARMcc_AL)
2062                addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2063          } else {
2064             addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2065          }
2066          return;
2067       case ARMin_Ld8S:
2068          addRegUsage_ARMAMode2(u, i->ARMin.Ld8S.amode);
2069          addHRegUse(u, HRmWrite, i->ARMin.Ld8S.rD);
2070          if (i->ARMin.Ld8S.cc != ARMcc_AL)
2071             addHRegUse(u, HRmRead, i->ARMin.Ld8S.rD);
2072          return;
2073       /* XDirect/XIndir/XAssisted are also a bit subtle.  They
2074          conditionally exit the block.  Hence we only need to list (1)
2075          the registers that they read, and (2) the registers that they
2076          write in the case where the block is not exited.  (2) is
2077          empty, hence only (1) is relevant here. */
2078       case ARMin_XDirect:
2079          addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
2080          return;
2081       case ARMin_XIndir:
2082          addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
2083          addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
2084          return;
2085       case ARMin_XAssisted:
2086          addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
2087          addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
2088          return;
2089       case ARMin_CMov:
2090          addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
2091          addHRegUse(u, HRmRead,  i->ARMin.CMov.dst);
2092          addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
2093          return;
2094       case ARMin_Call:
2095          /* logic and comments copied/modified from x86 back end */
2096          /* This is a bit subtle. */
2097          /* First off, claim it trashes all the caller-saved regs
2098             which fall within the register allocator's jurisdiction.
2099             These I believe to be r0,1,2,3.  If it turns out that r9
2100             is also caller-saved, then we'll have to add that here
2101             too. */
2102          addHRegUse(u, HRmWrite, hregARM_R0());
2103          addHRegUse(u, HRmWrite, hregARM_R1());
2104          addHRegUse(u, HRmWrite, hregARM_R2());
2105          addHRegUse(u, HRmWrite, hregARM_R3());
2106          /* Now we have to state any parameter-carrying registers
2107             which might be read.  This depends on nArgRegs. */
2108          switch (i->ARMin.Call.nArgRegs) {
2109             case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
2110             case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
2111             case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
2112             case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
2113             case 0: break;
2114             default: vpanic("getRegUsage_ARM:Call:regparms");
2115          }
2116          /* Finally, there is the issue that the insn trashes a
2117             register because the literal target address has to be
2118             loaded into a register.  Fortunately, for the nArgRegs=
2119             0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
2120             this does not cause any further damage.  For the
2121             nArgRegs=4 case, we'll have to choose another register
2122             arbitrarily since all the caller saved regs are used for
2123             parameters, and so we might as well choose r11.
2124             */
2125          if (i->ARMin.Call.nArgRegs == 4)
2126             addHRegUse(u, HRmWrite, hregARM_R11());
2127          /* Upshot of this is that the assembler really must observe
2128             the here-stated convention of which register to use as an
2129             address temporary, depending on nArgRegs: 0==r0,
2130             1==r1, 2==r2, 3==r3, 4==r11 */
2131          return;
2132       case ARMin_Mul:
2133          addHRegUse(u, HRmRead, hregARM_R2());
2134          addHRegUse(u, HRmRead, hregARM_R3());
2135          addHRegUse(u, HRmWrite, hregARM_R0());
2136          if (i->ARMin.Mul.op != ARMmul_PLAIN)
2137             addHRegUse(u, HRmWrite, hregARM_R1());
2138          return;
2139       case ARMin_LdrEX:
2140          addHRegUse(u, HRmRead, hregARM_R4());
2141          addHRegUse(u, HRmWrite, hregARM_R2());
2142          if (i->ARMin.LdrEX.szB == 8)
2143             addHRegUse(u, HRmWrite, hregARM_R3());
2144          return;
2145       case ARMin_StrEX:
2146          addHRegUse(u, HRmRead, hregARM_R4());
2147          addHRegUse(u, HRmWrite, hregARM_R0());
2148          addHRegUse(u, HRmRead, hregARM_R2());
2149          if (i->ARMin.StrEX.szB == 8)
2150             addHRegUse(u, HRmRead, hregARM_R3());
2151          return;
2152       case ARMin_VLdStD:
2153          addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
2154          if (i->ARMin.VLdStD.isLoad) {
2155             addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
2156          } else {
2157             addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
2158          }
2159          return;
2160       case ARMin_VLdStS:
2161          addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
2162          if (i->ARMin.VLdStS.isLoad) {
2163             addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
2164          } else {
2165             addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
2166          }
2167          return;
2168       case ARMin_VAluD:
2169          addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
2170          addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
2171          addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
2172          return;
2173       case ARMin_VAluS:
2174          addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2175          addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2176          addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2177          return;
2178       case ARMin_VUnaryD:
2179          addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2180          addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2181          return;
2182       case ARMin_VUnaryS:
2183          addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2184          addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2185          return;
2186       case ARMin_VCmpD:
2187          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
2188          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
2189          return;
2190       case ARMin_VCMovD:
2191          addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2192          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.dst);
2193          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.src);
2194          return;
2195       case ARMin_VCMovS:
2196          addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2197          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.dst);
2198          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.src);
2199          return;
2200       case ARMin_VCvtSD:
2201          addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
2202          addHRegUse(u, HRmRead,  i->ARMin.VCvtSD.src);
2203          return;
2204       case ARMin_VXferD:
2205          if (i->ARMin.VXferD.toD) {
2206             addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2207             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rHi);
2208             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rLo);
2209          } else {
2210             addHRegUse(u, HRmRead,  i->ARMin.VXferD.dD);
2211             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2212             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2213          }
2214          return;
2215       case ARMin_VXferS:
2216          if (i->ARMin.VXferS.toS) {
2217             addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2218             addHRegUse(u, HRmRead,  i->ARMin.VXferS.rLo);
2219          } else {
2220             addHRegUse(u, HRmRead,  i->ARMin.VXferS.fD);
2221             addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2222          }
2223          return;
2224       case ARMin_VCvtID:
2225          addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2226          addHRegUse(u, HRmRead,  i->ARMin.VCvtID.src);
2227          return;
2228       case ARMin_FPSCR:
2229          if (i->ARMin.FPSCR.toFPSCR)
2230             addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
2231          else
2232             addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
2233          return;
2234       case ARMin_MFence:
2235          return;
2236       case ARMin_CLREX:
2237          return;
2238       case ARMin_NLdStQ:
2239          if (i->ARMin.NLdStQ.isLoad)
2240             addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
2241          else
2242             addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
2243          addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
2244          return;
2245       case ARMin_NLdStD:
2246          if (i->ARMin.NLdStD.isLoad)
2247             addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2248          else
2249             addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2250          addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2251          return;
2252       case ARMin_NUnary:
2253          addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2254          addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2255          return;
2256       case ARMin_NUnaryS:
2257          addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2258          addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2259          return;
2260       case ARMin_NShift:
2261          addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2262          addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2263          addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2264          return;
2265       case ARMin_NShl64:
2266          addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
2267          addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
2268          return;
2269       case ARMin_NDual:
2270          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2271          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2272          addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2273          addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2274          return;
2275       case ARMin_NBinary:
2276          addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2277          /* TODO: sometimes dst is also being read! */
2278          // XXX fix this
2279          addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2280          addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2281          return;
2282       case ARMin_NeonImm:
2283          addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
2284          return;
2285       case ARMin_NCMovQ:
2286          addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2287          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.dst);
2288          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.src);
2289          return;
2290       case ARMin_Add32:
2291          addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2292          addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2293          return;
2294       case ARMin_EvCheck:
2295          /* We expect both amodes only to mention r8, so this is in
2296             fact pointless, since r8 isn't allocatable, but
2297             anyway.. */
2298          addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
2299          addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
2300          addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
2301          return;
2302       case ARMin_ProfInc:
2303          addHRegUse(u, HRmWrite, hregARM_R12());
2304          addHRegUse(u, HRmWrite, hregARM_R11());
2305          return;
2306       default:
2307          ppARMInstr(i);
2308          vpanic("getRegUsage_ARMInstr");
2309    }
2310 }
2311 
2312 
mapRegs_ARMInstr(HRegRemap * m,ARMInstr * i,Bool mode64)2313 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2314 {
2315    vassert(mode64 == False);
2316    switch (i->tag) {
2317       case ARMin_Alu:
2318          i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
2319          i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
2320          mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
2321          return;
2322       case ARMin_Shift:
2323          i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
2324          i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
2325          mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
2326          return;
2327       case ARMin_Unary:
2328          i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
2329          i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
2330          return;
2331       case ARMin_CmpOrTst:
2332          i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
2333          mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
2334          return;
2335       case ARMin_Mov:
2336          i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
2337          mapRegs_ARMRI84(m, i->ARMin.Mov.src);
2338          return;
2339       case ARMin_Imm32:
2340          i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
2341          return;
2342       case ARMin_LdSt32:
2343          i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
2344          mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
2345          return;
2346       case ARMin_LdSt16:
2347          i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
2348          mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
2349          return;
2350       case ARMin_LdSt8U:
2351          i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
2352          mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
2353          return;
2354       case ARMin_Ld8S:
2355          i->ARMin.Ld8S.rD = lookupHRegRemap(m, i->ARMin.Ld8S.rD);
2356          mapRegs_ARMAMode2(m, i->ARMin.Ld8S.amode);
2357          return;
2358       case ARMin_XDirect:
2359          mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
2360          return;
2361       case ARMin_XIndir:
2362          i->ARMin.XIndir.dstGA
2363             = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
2364          mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
2365          return;
2366       case ARMin_XAssisted:
2367          i->ARMin.XAssisted.dstGA
2368             = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
2369          mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
2370          return;
2371       case ARMin_CMov:
2372          i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
2373          mapRegs_ARMRI84(m, i->ARMin.CMov.src);
2374          return;
2375       case ARMin_Call:
2376          return;
2377       case ARMin_Mul:
2378          return;
2379       case ARMin_LdrEX:
2380          return;
2381       case ARMin_StrEX:
2382          return;
2383       case ARMin_VLdStD:
2384          i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
2385          mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
2386          return;
2387       case ARMin_VLdStS:
2388          i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
2389          mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
2390          return;
2391       case ARMin_VAluD:
2392          i->ARMin.VAluD.dst  = lookupHRegRemap(m, i->ARMin.VAluD.dst);
2393          i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
2394          i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
2395          return;
2396       case ARMin_VAluS:
2397          i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
2398          i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
2399          i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
2400          return;
2401       case ARMin_VUnaryD:
2402          i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
2403          i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
2404          return;
2405       case ARMin_VUnaryS:
2406          i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
2407          i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
2408          return;
2409       case ARMin_VCmpD:
2410          i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
2411          i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
2412          return;
2413       case ARMin_VCMovD:
2414          i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
2415          i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
2416          return;
2417       case ARMin_VCMovS:
2418          i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
2419          i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
2420          return;
2421       case ARMin_VCvtSD:
2422          i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
2423          i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
2424          return;
2425       case ARMin_VXferD:
2426          i->ARMin.VXferD.dD  = lookupHRegRemap(m, i->ARMin.VXferD.dD);
2427          i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
2428          i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
2429          return;
2430       case ARMin_VXferS:
2431          i->ARMin.VXferS.fD  = lookupHRegRemap(m, i->ARMin.VXferS.fD);
2432          i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
2433          return;
2434       case ARMin_VCvtID:
2435          i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
2436          i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
2437          return;
2438       case ARMin_FPSCR:
2439          i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
2440          return;
2441       case ARMin_MFence:
2442          return;
2443       case ARMin_CLREX:
2444          return;
2445       case ARMin_NLdStQ:
2446          i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
2447          mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
2448          return;
2449       case ARMin_NLdStD:
2450          i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
2451          mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
2452          return;
2453       case ARMin_NUnary:
2454          i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
2455          i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
2456          return;
2457       case ARMin_NUnaryS:
2458          i->ARMin.NUnaryS.src->reg
2459             = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
2460          i->ARMin.NUnaryS.dst->reg
2461             = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
2462          return;
2463       case ARMin_NShift:
2464          i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
2465          i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
2466          i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
2467          return;
2468       case ARMin_NShl64:
2469          i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
2470          i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
2471          return;
2472       case ARMin_NDual:
2473          i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
2474          i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
2475          return;
2476       case ARMin_NBinary:
2477          i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
2478          i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
2479          i->ARMin.NBinary.dst  = lookupHRegRemap(m, i->ARMin.NBinary.dst);
2480          return;
2481       case ARMin_NeonImm:
2482          i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
2483          return;
2484       case ARMin_NCMovQ:
2485          i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
2486          i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
2487          return;
2488       case ARMin_Add32:
2489          i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
2490          i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
2491          return;
2492       case ARMin_EvCheck:
2493          /* We expect both amodes only to mention r8, so this is in
2494             fact pointless, since r8 isn't allocatable, but
2495             anyway.. */
2496          mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
2497          mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
2498          return;
2499       case ARMin_ProfInc:
2500          /* hardwires r11 and r12 -- nothing to modify. */
2501          return;
2502       default:
2503          ppARMInstr(i);
2504          vpanic("mapRegs_ARMInstr");
2505    }
2506 }
2507 
2508 /* Figure out if i represents a reg-reg move, and if so assign the
2509    source and destination to *src and *dst.  If in doubt say No.  Used
2510    by the register allocator to do move coalescing.
2511 */
isMove_ARMInstr(const ARMInstr * i,HReg * src,HReg * dst)2512 Bool isMove_ARMInstr ( const ARMInstr* i, HReg* src, HReg* dst )
2513 {
2514    /* Moves between integer regs */
2515    switch (i->tag) {
2516       case ARMin_Mov:
2517          if (i->ARMin.Mov.src->tag == ARMri84_R) {
2518             *src = i->ARMin.Mov.src->ARMri84.R.reg;
2519             *dst = i->ARMin.Mov.dst;
2520             return True;
2521          }
2522          break;
2523       case ARMin_VUnaryD:
2524          if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2525             *src = i->ARMin.VUnaryD.src;
2526             *dst = i->ARMin.VUnaryD.dst;
2527             return True;
2528          }
2529          break;
2530       case ARMin_VUnaryS:
2531          if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2532             *src = i->ARMin.VUnaryS.src;
2533             *dst = i->ARMin.VUnaryS.dst;
2534             return True;
2535          }
2536          break;
2537       case ARMin_NUnary:
2538          if (i->ARMin.NUnary.op == ARMneon_COPY) {
2539             *src = i->ARMin.NUnary.src;
2540             *dst = i->ARMin.NUnary.dst;
2541             return True;
2542          }
2543          break;
2544       default:
2545          break;
2546    }
2547 
2548    return False;
2549 }
2550 
2551 
2552 /* Generate arm spill/reload instructions under the direction of the
2553    register allocator.  Note it's critical these don't write the
2554    condition codes. */
2555 
genSpill_ARM(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2556 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2557                     HReg rreg, Int offsetB, Bool mode64 )
2558 {
2559    HRegClass rclass;
2560    vassert(offsetB >= 0);
2561    vassert(!hregIsVirtual(rreg));
2562    vassert(mode64 == False);
2563    *i1 = *i2 = NULL;
2564    rclass = hregClass(rreg);
2565    switch (rclass) {
2566       case HRcInt32:
2567          vassert(offsetB <= 4095);
2568          *i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
2569                                 rreg,
2570                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
2571          return;
2572       case HRcFlt32:
2573       case HRcFlt64: {
2574          HReg r8   = hregARM_R8();  /* baseblock */
2575          HReg r12  = hregARM_R12(); /* spill temp */
2576          HReg base = r8;
2577          vassert(0 == (offsetB & 3));
2578          if (offsetB >= 1024) {
2579             Int offsetKB = offsetB / 1024;
2580             /* r12 = r8 + (1024 * offsetKB) */
2581             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2582                                ARMRI84_I84(offsetKB, 11));
2583             offsetB -= (1024 * offsetKB);
2584             base = r12;
2585          }
2586          vassert(offsetB <= 1020);
2587          if (rclass == HRcFlt32) {
2588             *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
2589                                    rreg,
2590                                    mkARMAModeV(base, offsetB) );
2591          } else {
2592             *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
2593                                    rreg,
2594                                    mkARMAModeV(base, offsetB) );
2595          }
2596          return;
2597       }
2598       case HRcVec128: {
2599          HReg r8  = hregARM_R8();
2600          HReg r12 = hregARM_R12();
2601          *i1 = ARMInstr_Add32(r12, r8, offsetB);
2602          *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
2603          return;
2604       }
2605       default:
2606          ppHRegClass(rclass);
2607          vpanic("genSpill_ARM: unimplemented regclass");
2608    }
2609 }
2610 
genReload_ARM(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2611 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2612                      HReg rreg, Int offsetB, Bool mode64 )
2613 {
2614    HRegClass rclass;
2615    vassert(offsetB >= 0);
2616    vassert(!hregIsVirtual(rreg));
2617    vassert(mode64 == False);
2618    *i1 = *i2 = NULL;
2619    rclass = hregClass(rreg);
2620    switch (rclass) {
2621       case HRcInt32:
2622          vassert(offsetB <= 4095);
2623          *i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
2624                                 rreg,
2625                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
2626          return;
2627       case HRcFlt32:
2628       case HRcFlt64: {
2629          HReg r8   = hregARM_R8();  /* baseblock */
2630          HReg r12  = hregARM_R12(); /* spill temp */
2631          HReg base = r8;
2632          vassert(0 == (offsetB & 3));
2633          if (offsetB >= 1024) {
2634             Int offsetKB = offsetB / 1024;
2635             /* r12 = r8 + (1024 * offsetKB) */
2636             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2637                                ARMRI84_I84(offsetKB, 11));
2638             offsetB -= (1024 * offsetKB);
2639             base = r12;
2640          }
2641          vassert(offsetB <= 1020);
2642          if (rclass == HRcFlt32) {
2643             *i2 = ARMInstr_VLdStS( True/*isLoad*/,
2644                                    rreg,
2645                                    mkARMAModeV(base, offsetB) );
2646          } else {
2647             *i2 = ARMInstr_VLdStD( True/*isLoad*/,
2648                                    rreg,
2649                                    mkARMAModeV(base, offsetB) );
2650          }
2651          return;
2652       }
2653       case HRcVec128: {
2654          HReg r8  = hregARM_R8();
2655          HReg r12 = hregARM_R12();
2656          *i1 = ARMInstr_Add32(r12, r8, offsetB);
2657          *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
2658          return;
2659       }
2660       default:
2661          ppHRegClass(rclass);
2662          vpanic("genReload_ARM: unimplemented regclass");
2663    }
2664 }
2665 
2666 
2667 /* Emit an instruction into buf and return the number of bytes used.
2668    Note that buf is not the insn's final place, and therefore it is
2669    imperative to emit position-independent code. */
2670 
iregEnc(HReg r)2671 static inline UInt iregEnc ( HReg r )
2672 {
2673    UInt n;
2674    vassert(hregClass(r) == HRcInt32);
2675    vassert(!hregIsVirtual(r));
2676    n = hregEncoding(r);
2677    vassert(n <= 15);
2678    return n;
2679 }
2680 
dregEnc(HReg r)2681 static inline UInt dregEnc ( HReg r )
2682 {
2683    UInt n;
2684    vassert(hregClass(r) == HRcFlt64);
2685    vassert(!hregIsVirtual(r));
2686    n = hregEncoding(r);
2687    vassert(n <= 31);
2688    return n;
2689 }
2690 
fregEnc(HReg r)2691 static inline UInt fregEnc ( HReg r )
2692 {
2693    UInt n;
2694    vassert(hregClass(r) == HRcFlt32);
2695    vassert(!hregIsVirtual(r));
2696    n = hregEncoding(r);
2697    vassert(n <= 31);
2698    return n;
2699 }
2700 
qregEnc(HReg r)2701 static inline UInt qregEnc ( HReg r )
2702 {
2703    UInt n;
2704    vassert(hregClass(r) == HRcVec128);
2705    vassert(!hregIsVirtual(r));
2706    n = hregEncoding(r);
2707    vassert(n <= 15);
2708    return n;
2709 }
2710 
2711 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2712    (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2713 #define X0000  BITS4(0,0,0,0)
2714 #define X0001  BITS4(0,0,0,1)
2715 #define X0010  BITS4(0,0,1,0)
2716 #define X0011  BITS4(0,0,1,1)
2717 #define X0100  BITS4(0,1,0,0)
2718 #define X0101  BITS4(0,1,0,1)
2719 #define X0110  BITS4(0,1,1,0)
2720 #define X0111  BITS4(0,1,1,1)
2721 #define X1000  BITS4(1,0,0,0)
2722 #define X1001  BITS4(1,0,0,1)
2723 #define X1010  BITS4(1,0,1,0)
2724 #define X1011  BITS4(1,0,1,1)
2725 #define X1100  BITS4(1,1,0,0)
2726 #define X1101  BITS4(1,1,0,1)
2727 #define X1110  BITS4(1,1,1,0)
2728 #define X1111  BITS4(1,1,1,1)
2729 
2730 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2731    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2732     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2733     (((zzx3) & 0xF) << 12))
2734 
2735 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
2736    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2737     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2738     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
2739 
2740 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
2741    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2742     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2743     (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
2744 
2745 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2746   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2747    (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2748    (((zzx0) & 0xF) << 0))
2749 
2750 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
2751    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2752     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2753     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
2754     (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
2755 
2756 #define XX______(zzx7,zzx6) \
2757    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2758 
2759 /* Generate a skeletal insn that involves an a RI84 shifter operand.
2760    Returns a word which is all zeroes apart from bits 25 and 11..0,
2761    since it is those that encode the shifter operand (at least to the
2762    extent that we care about it.) */
skeletal_RI84(ARMRI84 * ri)2763 static UInt skeletal_RI84 ( ARMRI84* ri )
2764 {
2765    UInt instr;
2766    if (ri->tag == ARMri84_I84) {
2767       vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
2768       vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
2769       instr = 1 << 25;
2770       instr |= (ri->ARMri84.I84.imm4 << 8);
2771       instr |= ri->ARMri84.I84.imm8;
2772    } else {
2773       instr = 0 << 25;
2774       instr |= iregEnc(ri->ARMri84.R.reg);
2775    }
2776    return instr;
2777 }
2778 
2779 /* Ditto for RI5.  Resulting word is zeroes apart from bit 4 and bits
2780    11..7. */
skeletal_RI5(ARMRI5 * ri)2781 static UInt skeletal_RI5 ( ARMRI5* ri )
2782 {
2783    UInt instr;
2784    if (ri->tag == ARMri5_I5) {
2785       UInt imm5 = ri->ARMri5.I5.imm5;
2786       vassert(imm5 >= 1 && imm5 <= 31);
2787       instr = 0 << 4;
2788       instr |= imm5 << 7;
2789    } else {
2790       instr = 1 << 4;
2791       instr |= iregEnc(ri->ARMri5.R.reg) << 8;
2792    }
2793    return instr;
2794 }
2795 
2796 
2797 /* Get an immediate into a register, using only that
2798    register.  (very lame..) */
imm32_to_ireg(UInt * p,Int rD,UInt imm32)2799 static UInt* imm32_to_ireg ( UInt* p, Int rD, UInt imm32 )
2800 {
2801    UInt instr;
2802    vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
2803 #if 0
2804    if (0 == (imm32 & ~0xFF)) {
2805       /* mov with a immediate shifter operand of (0, imm32) (??) */
2806       instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
2807       instr |= imm32;
2808       *p++ = instr;
2809    } else {
2810       // this is very bad; causes Dcache pollution
2811       // ldr  rD, [pc]
2812       instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
2813       *p++ = instr;
2814       // b .+8
2815       instr = 0xEA000000;
2816       *p++ = instr;
2817       // .word imm32
2818       *p++ = imm32;
2819    }
2820 #else
2821    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2822       /* Generate movw rD, #low16.  Then, if the high 16 are
2823          nonzero, generate movt rD, #high16. */
2824       UInt lo16 = imm32 & 0xFFFF;
2825       UInt hi16 = (imm32 >> 16) & 0xFFFF;
2826       instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2827                        (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2828                        lo16 & 0xF);
2829       *p++ = instr;
2830       if (hi16 != 0) {
2831          instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2832                           (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2833                           hi16 & 0xF);
2834          *p++ = instr;
2835       }
2836    } else {
2837       UInt imm, rot;
2838       UInt op = X1010;
2839       UInt rN = 0;
2840       if ((imm32 & 0xFF) || (imm32 == 0)) {
2841          imm = imm32 & 0xFF;
2842          rot = 0;
2843          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2844          *p++ = instr;
2845          op = X1000;
2846          rN = rD;
2847       }
2848       if (imm32 & 0xFF000000) {
2849          imm = (imm32 >> 24) & 0xFF;
2850          rot = 4;
2851          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2852          *p++ = instr;
2853          op = X1000;
2854          rN = rD;
2855       }
2856       if (imm32 & 0xFF0000) {
2857          imm = (imm32 >> 16) & 0xFF;
2858          rot = 8;
2859          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2860          *p++ = instr;
2861          op = X1000;
2862          rN = rD;
2863       }
2864       if (imm32 & 0xFF00) {
2865          imm = (imm32 >> 8) & 0xFF;
2866          rot = 12;
2867          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2868          *p++ = instr;
2869          op = X1000;
2870          rN = rD;
2871       }
2872    }
2873 #endif
2874    return p;
2875 }
2876 
2877 /* Get an immediate into a register, using only that register, and
2878    generating exactly 2 instructions, regardless of the value of the
2879    immediate. This is used when generating sections of code that need
2880    to be patched later, so as to guarantee a specific size. */
imm32_to_ireg_EXACTLY2(UInt * p,Int rD,UInt imm32)2881 static UInt* imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2882 {
2883    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2884       /* Generate movw rD, #low16 ;  movt rD, #high16. */
2885       UInt lo16 = imm32 & 0xFFFF;
2886       UInt hi16 = (imm32 >> 16) & 0xFFFF;
2887       UInt instr;
2888       instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2889                        (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2890                        lo16 & 0xF);
2891       *p++ = instr;
2892       instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2893                        (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2894                        hi16 & 0xF);
2895       *p++ = instr;
2896    } else {
2897       vassert(0); /* lose */
2898    }
2899    return p;
2900 }
2901 
2902 /* Check whether p points at a 2-insn sequence cooked up by
2903    imm32_to_ireg_EXACTLY2(). */
is_imm32_to_ireg_EXACTLY2(UInt * p,Int rD,UInt imm32)2904 static Bool is_imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2905 {
2906    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2907       /* Generate movw rD, #low16 ;  movt rD, #high16. */
2908       UInt lo16 = imm32 & 0xFFFF;
2909       UInt hi16 = (imm32 >> 16) & 0xFFFF;
2910       UInt i0, i1;
2911       i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2912                     (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2913                     lo16 & 0xF);
2914       i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2915                     (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2916                     hi16 & 0xF);
2917       return p[0] == i0 && p[1] == i1;
2918    } else {
2919       vassert(0); /* lose */
2920    }
2921 }
2922 
2923 
do_load_or_store32(UInt * p,Bool isLoad,UInt rD,ARMAMode1 * am)2924 static UInt* do_load_or_store32 ( UInt* p,
2925                                   Bool isLoad, UInt rD, ARMAMode1* am )
2926 {
2927    vassert(rD <= 12);
2928    vassert(am->tag == ARMam1_RI); // RR case is not handled
2929    UInt bB = 0;
2930    UInt bL = isLoad ? 1 : 0;
2931    Int  simm12;
2932    UInt instr, bP;
2933    if (am->ARMam1.RI.simm13 < 0) {
2934       bP = 0;
2935       simm12 = -am->ARMam1.RI.simm13;
2936    } else {
2937       bP = 1;
2938       simm12 = am->ARMam1.RI.simm13;
2939    }
2940    vassert(simm12 >= 0 && simm12 <= 4095);
2941    instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
2942                     iregEnc(am->ARMam1.RI.reg),
2943                     rD);
2944    instr |= simm12;
2945    *p++ = instr;
2946    return p;
2947 }
2948 
2949 
2950 /* Emit an instruction into buf and return the number of bytes used.
2951    Note that buf is not the insn's final place, and therefore it is
2952    imperative to emit position-independent code.  If the emitted
2953    instruction was a profiler inc, set *is_profInc to True, else
2954    leave it unchanged. */
2955 
emit_ARMInstr(Bool * is_profInc,UChar * buf,Int nbuf,const ARMInstr * i,Bool mode64,VexEndness endness_host,const void * disp_cp_chain_me_to_slowEP,const void * disp_cp_chain_me_to_fastEP,const void * disp_cp_xindir,const void * disp_cp_xassisted)2956 Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
2957                     UChar* buf, Int nbuf, const ARMInstr* i,
2958                     Bool mode64, VexEndness endness_host,
2959                     const void* disp_cp_chain_me_to_slowEP,
2960                     const void* disp_cp_chain_me_to_fastEP,
2961                     const void* disp_cp_xindir,
2962                     const void* disp_cp_xassisted )
2963 {
2964    UInt* p = (UInt*)buf;
2965    vassert(nbuf >= 32);
2966    vassert(mode64 == False);
2967    vassert(0 == (((HWord)buf) & 3));
2968 
2969    switch (i->tag) {
2970       case ARMin_Alu: {
2971          UInt     instr, subopc;
2972          UInt     rD   = iregEnc(i->ARMin.Alu.dst);
2973          UInt     rN   = iregEnc(i->ARMin.Alu.argL);
2974          ARMRI84* argR = i->ARMin.Alu.argR;
2975          switch (i->ARMin.Alu.op) {
2976             case ARMalu_ADDS: /* fallthru */
2977             case ARMalu_ADD:  subopc = X0100; break;
2978             case ARMalu_ADC:  subopc = X0101; break;
2979             case ARMalu_SUBS: /* fallthru */
2980             case ARMalu_SUB:  subopc = X0010; break;
2981             case ARMalu_SBC:  subopc = X0110; break;
2982             case ARMalu_AND:  subopc = X0000; break;
2983             case ARMalu_BIC:  subopc = X1110; break;
2984             case ARMalu_OR:   subopc = X1100; break;
2985             case ARMalu_XOR:  subopc = X0001; break;
2986             default: goto bad;
2987          }
2988          instr = skeletal_RI84(argR);
2989          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
2990                            (subopc << 1) & 0xF, rN, rD);
2991          if (i->ARMin.Alu.op == ARMalu_ADDS
2992              || i->ARMin.Alu.op == ARMalu_SUBS) {
2993             instr |= 1<<20;  /* set the S bit */
2994          }
2995          *p++ = instr;
2996          goto done;
2997       }
2998       case ARMin_Shift: {
2999          UInt    instr, subopc;
3000          UInt    rD   = iregEnc(i->ARMin.Shift.dst);
3001          UInt    rM   = iregEnc(i->ARMin.Shift.argL);
3002          ARMRI5* argR = i->ARMin.Shift.argR;
3003          switch (i->ARMin.Shift.op) {
3004             case ARMsh_SHL: subopc = X0000; break;
3005             case ARMsh_SHR: subopc = X0001; break;
3006             case ARMsh_SAR: subopc = X0010; break;
3007             default: goto bad;
3008          }
3009          instr = skeletal_RI5(argR);
3010          instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
3011          instr |= (subopc & 3) << 5;
3012          *p++ = instr;
3013          goto done;
3014       }
3015       case ARMin_Unary: {
3016          UInt instr;
3017          UInt rDst = iregEnc(i->ARMin.Unary.dst);
3018          UInt rSrc = iregEnc(i->ARMin.Unary.src);
3019          switch (i->ARMin.Unary.op) {
3020             case ARMun_CLZ:
3021                instr = XXXXXXXX(X1110,X0001,X0110,X1111,
3022                                 rDst,X1111,X0001,rSrc);
3023                *p++ = instr;
3024                goto done;
3025             case ARMun_NEG: /* RSB rD,rS,#0 */
3026                instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
3027                *p++ = instr;
3028                goto done;
3029             case ARMun_NOT: {
3030                UInt subopc = X1111; /* MVN */
3031                instr = rSrc;
3032                instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3033                                  (subopc << 1) & 0xF, 0, rDst);
3034                *p++ = instr;
3035                goto done;
3036             }
3037             default:
3038                break;
3039          }
3040          goto bad;
3041       }
3042       case ARMin_CmpOrTst: {
3043          UInt instr  = skeletal_RI84(i->ARMin.CmpOrTst.argR);
3044          UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
3045          UInt SBZ    = 0;
3046          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3047                            ((subopc << 1) & 0xF) | 1,
3048                            iregEnc(i->ARMin.CmpOrTst.argL), SBZ );
3049          *p++ = instr;
3050          goto done;
3051       }
3052       case ARMin_Mov: {
3053          UInt instr  = skeletal_RI84(i->ARMin.Mov.src);
3054          UInt subopc = X1101; /* MOV */
3055          UInt SBZ    = 0;
3056          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3057                            (subopc << 1) & 0xF, SBZ,
3058                            iregEnc(i->ARMin.Mov.dst));
3059          *p++ = instr;
3060          goto done;
3061       }
3062       case ARMin_Imm32: {
3063          p = imm32_to_ireg( (UInt*)p, iregEnc(i->ARMin.Imm32.dst),
3064                                       i->ARMin.Imm32.imm32 );
3065          goto done;
3066       }
3067       case ARMin_LdSt32:
3068       case ARMin_LdSt8U: {
3069          UInt        bL, bB;
3070          HReg        rD;
3071          ARMAMode1*  am;
3072          ARMCondCode cc;
3073          if (i->tag == ARMin_LdSt32) {
3074             bB = 0;
3075             bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
3076             am = i->ARMin.LdSt32.amode;
3077             rD = i->ARMin.LdSt32.rD;
3078             cc = i->ARMin.LdSt32.cc;
3079          } else {
3080             bB = 1;
3081             bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
3082             am = i->ARMin.LdSt8U.amode;
3083             rD = i->ARMin.LdSt8U.rD;
3084             cc = i->ARMin.LdSt8U.cc;
3085          }
3086          vassert(cc != ARMcc_NV);
3087          if (am->tag == ARMam1_RI) {
3088             Int  simm12;
3089             UInt instr, bP;
3090             if (am->ARMam1.RI.simm13 < 0) {
3091                bP = 0;
3092                simm12 = -am->ARMam1.RI.simm13;
3093             } else {
3094                bP = 1;
3095                simm12 = am->ARMam1.RI.simm13;
3096             }
3097             vassert(simm12 >= 0 && simm12 <= 4095);
3098             instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
3099                              iregEnc(am->ARMam1.RI.reg),
3100                              iregEnc(rD));
3101             instr |= simm12;
3102             *p++ = instr;
3103             goto done;
3104          } else {
3105             // RR case
3106             goto bad;
3107          }
3108       }
3109       case ARMin_LdSt16: {
3110          HReg        rD = i->ARMin.LdSt16.rD;
3111          UInt        bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
3112          UInt        bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
3113          ARMAMode2*  am = i->ARMin.LdSt16.amode;
3114          ARMCondCode cc = i->ARMin.LdSt16.cc;
3115          vassert(cc != ARMcc_NV);
3116          if (am->tag == ARMam2_RI) {
3117             HReg rN = am->ARMam2.RI.reg;
3118             Int  simm8;
3119             UInt bP, imm8hi, imm8lo, instr;
3120             if (am->ARMam2.RI.simm9 < 0) {
3121                bP = 0;
3122                simm8 = -am->ARMam2.RI.simm9;
3123             } else {
3124                bP = 1;
3125                simm8 = am->ARMam2.RI.simm9;
3126             }
3127             vassert(simm8 >= 0 && simm8 <= 255);
3128             imm8hi = (simm8 >> 4) & 0xF;
3129             imm8lo = simm8 & 0xF;
3130             vassert(!(bL == 0 && bS == 1)); // "! signed store"
3131             /**/ if (bL == 0 && bS == 0) {
3132                // strh
3133                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregEnc(rN),
3134                                 iregEnc(rD), imm8hi, X1011, imm8lo);
3135                *p++ = instr;
3136                goto done;
3137             }
3138             else if (bL == 1 && bS == 0) {
3139                // ldrh
3140                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3141                                 iregEnc(rD), imm8hi, X1011, imm8lo);
3142                *p++ = instr;
3143                goto done;
3144             }
3145             else if (bL == 1 && bS == 1) {
3146                // ldrsh
3147                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3148                                 iregEnc(rD), imm8hi, X1111, imm8lo);
3149                *p++ = instr;
3150                goto done;
3151             }
3152             else vassert(0); // ill-constructed insn
3153          } else {
3154             // RR case
3155             goto bad;
3156          }
3157       }
3158       case ARMin_Ld8S: {
3159          HReg        rD = i->ARMin.Ld8S.rD;
3160          ARMAMode2*  am = i->ARMin.Ld8S.amode;
3161          ARMCondCode cc = i->ARMin.Ld8S.cc;
3162          vassert(cc != ARMcc_NV);
3163          if (am->tag == ARMam2_RI) {
3164             HReg rN = am->ARMam2.RI.reg;
3165             Int  simm8;
3166             UInt bP, imm8hi, imm8lo, instr;
3167             if (am->ARMam2.RI.simm9 < 0) {
3168                bP = 0;
3169                simm8 = -am->ARMam2.RI.simm9;
3170             } else {
3171                bP = 1;
3172                simm8 = am->ARMam2.RI.simm9;
3173             }
3174             vassert(simm8 >= 0 && simm8 <= 255);
3175             imm8hi = (simm8 >> 4) & 0xF;
3176             imm8lo = simm8 & 0xF;
3177             // ldrsb
3178             instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3179                              iregEnc(rD), imm8hi, X1101, imm8lo);
3180             *p++ = instr;
3181             goto done;
3182          } else {
3183             // RR case
3184             goto bad;
3185          }
3186       }
3187 
3188       case ARMin_XDirect: {
3189          /* NB: what goes on here has to be very closely coordinated
3190             with the chainXDirect_ARM and unchainXDirect_ARM below. */
3191          /* We're generating chain-me requests here, so we need to be
3192             sure this is actually allowed -- no-redir translations
3193             can't use chain-me's.  Hence: */
3194          vassert(disp_cp_chain_me_to_slowEP != NULL);
3195          vassert(disp_cp_chain_me_to_fastEP != NULL);
3196 
3197          /* Use ptmp for backpatching conditional jumps. */
3198          UInt* ptmp = NULL;
3199 
3200          /* First off, if this is conditional, create a conditional
3201             jump over the rest of it.  Or at least, leave a space for
3202             it that we will shortly fill in. */
3203          if (i->ARMin.XDirect.cond != ARMcc_AL) {
3204             vassert(i->ARMin.XDirect.cond != ARMcc_NV);
3205             ptmp = p;
3206             *p++ = 0;
3207          }
3208 
3209          /* Update the guest R15T. */
3210          /* movw r12, lo16(dstGA) */
3211          /* movt r12, hi16(dstGA) */
3212          /* str r12, amR15T */
3213          p = imm32_to_ireg(p, /*r*/12, i->ARMin.XDirect.dstGA);
3214          p = do_load_or_store32(p, False/*!isLoad*/,
3215                                 /*r*/12, i->ARMin.XDirect.amR15T);
3216 
3217          /* --- FIRST PATCHABLE BYTE follows --- */
3218          /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3219             calling to) backs up the return address, so as to find the
3220             address of the first patchable byte.  So: don't change the
3221             number of instructions (3) below. */
3222          /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3223          /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3224          /* blx  r12  (A1) */
3225          const void* disp_cp_chain_me
3226                   = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3227                                               : disp_cp_chain_me_to_slowEP;
3228          p = imm32_to_ireg_EXACTLY2(p, /*r*/12,
3229                                     (UInt)(Addr)disp_cp_chain_me);
3230          *p++ = 0xE12FFF3C;
3231          /* --- END of PATCHABLE BYTES --- */
3232 
3233          /* Fix up the conditional jump, if there was one. */
3234          if (i->ARMin.XDirect.cond != ARMcc_AL) {
3235             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3236             vassert(delta > 0 && delta < 40);
3237             vassert((delta & 3) == 0);
3238             UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
3239             vassert(notCond <= 13); /* Neither AL nor NV */
3240             delta = (delta >> 2) - 2;
3241             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3242          }
3243          goto done;
3244       }
3245 
3246       case ARMin_XIndir: {
3247          /* We're generating transfers that could lead indirectly to a
3248             chain-me, so we need to be sure this is actually allowed
3249             -- no-redir translations are not allowed to reach normal
3250             translations without going through the scheduler.  That
3251             means no XDirects or XIndirs out from no-redir
3252             translations.  Hence: */
3253          vassert(disp_cp_xindir != NULL);
3254 
3255          /* Use ptmp for backpatching conditional jumps. */
3256          UInt* ptmp = NULL;
3257 
3258          /* First off, if this is conditional, create a conditional
3259             jump over the rest of it.  Or at least, leave a space for
3260             it that we will shortly fill in. */
3261          if (i->ARMin.XIndir.cond != ARMcc_AL) {
3262             vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3263             ptmp = p;
3264             *p++ = 0;
3265          }
3266 
3267          /* Update the guest R15T. */
3268          /* str r-dstGA, amR15T */
3269          p = do_load_or_store32(p, False/*!isLoad*/,
3270                                 iregEnc(i->ARMin.XIndir.dstGA),
3271                                 i->ARMin.XIndir.amR15T);
3272 
3273          /* movw r12, lo16(VG_(disp_cp_xindir)) */
3274          /* movt r12, hi16(VG_(disp_cp_xindir)) */
3275          /* bx   r12  (A1) */
3276          p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xindir);
3277          *p++ = 0xE12FFF1C;
3278 
3279          /* Fix up the conditional jump, if there was one. */
3280          if (i->ARMin.XIndir.cond != ARMcc_AL) {
3281             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3282             vassert(delta > 0 && delta < 40);
3283             vassert((delta & 3) == 0);
3284             UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3285             vassert(notCond <= 13); /* Neither AL nor NV */
3286             delta = (delta >> 2) - 2;
3287             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3288          }
3289          goto done;
3290       }
3291 
3292       case ARMin_XAssisted: {
3293          /* Use ptmp for backpatching conditional jumps. */
3294          UInt* ptmp = NULL;
3295 
3296          /* First off, if this is conditional, create a conditional
3297             jump over the rest of it.  Or at least, leave a space for
3298             it that we will shortly fill in. */
3299          if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3300             vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
3301             ptmp = p;
3302             *p++ = 0;
3303          }
3304 
3305          /* Update the guest R15T. */
3306          /* str r-dstGA, amR15T */
3307          p = do_load_or_store32(p, False/*!isLoad*/,
3308                                 iregEnc(i->ARMin.XAssisted.dstGA),
3309                                 i->ARMin.XAssisted.amR15T);
3310 
3311          /* movw r8,  $magic_number */
3312          UInt trcval = 0;
3313          switch (i->ARMin.XAssisted.jk) {
3314             case Ijk_ClientReq:   trcval = VEX_TRC_JMP_CLIENTREQ;   break;
3315             case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3316             //case Ijk_Sys_int128:  trcval = VEX_TRC_JMP_SYS_INT128;  break;
3317             case Ijk_Yield:       trcval = VEX_TRC_JMP_YIELD;       break;
3318             //case Ijk_EmWarn:      trcval = VEX_TRC_JMP_EMWARN;      break;
3319             //case Ijk_MapFail:     trcval = VEX_TRC_JMP_MAPFAIL;     break;
3320             case Ijk_NoDecode:    trcval = VEX_TRC_JMP_NODECODE;    break;
3321             case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3322             case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
3323             //case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
3324             //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
3325             case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
3326             /* We don't expect to see the following being assisted. */
3327             //case Ijk_Ret:
3328             //case Ijk_Call:
3329             /* fallthrough */
3330             default:
3331                ppIRJumpKind(i->ARMin.XAssisted.jk);
3332                vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
3333          }
3334          vassert(trcval != 0);
3335          p = imm32_to_ireg(p, /*r*/8, trcval);
3336 
3337          /* movw r12, lo16(VG_(disp_cp_xassisted)) */
3338          /* movt r12, hi16(VG_(disp_cp_xassisted)) */
3339          /* bx   r12  (A1) */
3340          p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xassisted);
3341          *p++ = 0xE12FFF1C;
3342 
3343          /* Fix up the conditional jump, if there was one. */
3344          if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3345             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3346             vassert(delta > 0 && delta < 40);
3347             vassert((delta & 3) == 0);
3348             UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
3349             vassert(notCond <= 13); /* Neither AL nor NV */
3350             delta = (delta >> 2) - 2;
3351             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3352          }
3353          goto done;
3354       }
3355 
3356       case ARMin_CMov: {
3357          UInt instr  = skeletal_RI84(i->ARMin.CMov.src);
3358          UInt subopc = X1101; /* MOV */
3359          UInt SBZ    = 0;
3360          instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
3361                            (subopc << 1) & 0xF, SBZ,
3362                            iregEnc(i->ARMin.CMov.dst));
3363          *p++ = instr;
3364          goto done;
3365       }
3366 
3367       case ARMin_Call: {
3368          UInt instr;
3369          /* Decide on a scratch reg used to hold to the call address.
3370             This has to be done as per the comments in getRegUsage. */
3371          Int scratchNo;
3372          switch (i->ARMin.Call.nArgRegs) {
3373             case 0:  scratchNo = 0;  break;
3374             case 1:  scratchNo = 1;  break;
3375             case 2:  scratchNo = 2;  break;
3376             case 3:  scratchNo = 3;  break;
3377             case 4:  scratchNo = 11; break;
3378             default: vassert(0);
3379          }
3380          /* If we don't need to do any fixup actions in the case that
3381             the call doesn't happen, just do the simple thing and emit
3382             straight-line code.  We hope this is the common case. */
3383          if (i->ARMin.Call.cond == ARMcc_AL/*call always happens*/
3384              || i->ARMin.Call.rloc.pri == RLPri_None/*no fixup action*/) {
3385             // r"scratchNo" = &target
3386             p = imm32_to_ireg( (UInt*)p,
3387                                scratchNo, (UInt)i->ARMin.Call.target );
3388             // blx{cond} r"scratchNo"
3389             instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
3390                              X0011, scratchNo);
3391             instr |= 0xFFF << 8; // stick in the SBOnes
3392             *p++ = instr;
3393          } else {
3394             Int delta;
3395             /* Complex case.  We have to generate an if-then-else
3396                diamond. */
3397             // before:
3398             //   b{!cond} else:
3399             //   r"scratchNo" = &target
3400             //   blx{AL} r"scratchNo"
3401             // preElse:
3402             //   b after:
3403             // else:
3404             //   mov r0, #0x55555555  // possibly
3405             //   mov r1, r0           // possibly
3406             // after:
3407 
3408             // before:
3409             UInt* pBefore = p;
3410 
3411             //   b{!cond} else:  // ptmp1 points here
3412             *p++ = 0; // filled in later
3413 
3414             //   r"scratchNo" = &target
3415             p = imm32_to_ireg( (UInt*)p,
3416                                scratchNo, (UInt)i->ARMin.Call.target );
3417 
3418             //   blx{AL} r"scratchNo"
3419             instr = XXX___XX(ARMcc_AL, X0001, X0010, /*___*/
3420                              X0011, scratchNo);
3421             instr |= 0xFFF << 8; // stick in the SBOnes
3422             *p++ = instr;
3423 
3424             // preElse:
3425             UInt* pPreElse = p;
3426 
3427             //   b after:
3428             *p++ = 0; // filled in later
3429 
3430             // else:
3431             delta = (UChar*)p - (UChar*)pBefore;
3432             delta = (delta >> 2) - 2;
3433             *pBefore
3434                = XX______(1 ^ i->ARMin.Call.cond, X1010) | (delta & 0xFFFFFF);
3435 
3436             /* Do the 'else' actions */
3437             switch (i->ARMin.Call.rloc.pri) {
3438                case RLPri_Int:
3439                   p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3440                   break;
3441                case RLPri_2Int:
3442                   vassert(0); //ATC
3443                   p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3444                   /* mov r1, r0 */
3445                   *p++ = 0xE1A01000;
3446                   break;
3447                case RLPri_None: case RLPri_INVALID: default:
3448                   vassert(0);
3449             }
3450 
3451             // after:
3452             delta = (UChar*)p - (UChar*)pPreElse;
3453             delta = (delta >> 2) - 2;
3454             *pPreElse = XX______(ARMcc_AL, X1010) | (delta & 0xFFFFFF);
3455          }
3456 
3457          goto done;
3458       }
3459 
3460       case ARMin_Mul: {
3461          /* E0000392   mul     r0, r2, r3
3462             E0810392   umull   r0(LO), r1(HI), r2, r3
3463             E0C10392   smull   r0(LO), r1(HI), r2, r3
3464          */
3465          switch (i->ARMin.Mul.op) {
3466             case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
3467             case ARMmul_ZX:    *p++ = 0xE0810392; goto done;
3468             case ARMmul_SX:    *p++ = 0xE0C10392; goto done;
3469             default: vassert(0);
3470          }
3471          goto bad;
3472       }
3473       case ARMin_LdrEX: {
3474          /* E1D42F9F   ldrexb r2, [r4]
3475             E1F42F9F   ldrexh r2, [r4]
3476             E1942F9F   ldrex  r2, [r4]
3477             E1B42F9F   ldrexd r2, r3, [r4]
3478          */
3479          switch (i->ARMin.LdrEX.szB) {
3480             case 1: *p++ = 0xE1D42F9F; goto done;
3481             case 2: *p++ = 0xE1F42F9F; goto done;
3482             case 4: *p++ = 0xE1942F9F; goto done;
3483             case 8: *p++ = 0xE1B42F9F; goto done;
3484             default: break;
3485          }
3486          goto bad;
3487       }
3488       case ARMin_StrEX: {
3489          /* E1C40F92   strexb r0, r2, [r4]
3490             E1E40F92   strexh r0, r2, [r4]
3491             E1840F92   strex  r0, r2, [r4]
3492             E1A40F92   strexd r0, r2, r3, [r4]
3493          */
3494          switch (i->ARMin.StrEX.szB) {
3495             case 1: *p++ = 0xE1C40F92; goto done;
3496             case 2: *p++ = 0xE1E40F92; goto done;
3497             case 4: *p++ = 0xE1840F92; goto done;
3498             case 8: *p++ = 0xE1A40F92; goto done;
3499             default: break;
3500          }
3501          goto bad;
3502       }
3503       case ARMin_VLdStD: {
3504          UInt dD     = dregEnc(i->ARMin.VLdStD.dD);
3505          UInt rN     = iregEnc(i->ARMin.VLdStD.amode->reg);
3506          Int  simm11 = i->ARMin.VLdStD.amode->simm11;
3507          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3508          UInt bU     = simm11 >= 0 ? 1 : 0;
3509          UInt bL     = i->ARMin.VLdStD.isLoad ? 1 : 0;
3510          UInt insn;
3511          vassert(0 == (off8 & 3));
3512          off8 >>= 2;
3513          vassert(0 == (off8 & 0xFFFFFF00));
3514          insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
3515          insn |= off8;
3516          *p++ = insn;
3517          goto done;
3518       }
3519       case ARMin_VLdStS: {
3520          UInt fD     = fregEnc(i->ARMin.VLdStS.fD);
3521          UInt rN     = iregEnc(i->ARMin.VLdStS.amode->reg);
3522          Int  simm11 = i->ARMin.VLdStS.amode->simm11;
3523          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3524          UInt bU     = simm11 >= 0 ? 1 : 0;
3525          UInt bL     = i->ARMin.VLdStS.isLoad ? 1 : 0;
3526          UInt bD     = fD & 1;
3527          UInt insn;
3528          vassert(0 == (off8 & 3));
3529          off8 >>= 2;
3530          vassert(0 == (off8 & 0xFFFFFF00));
3531          insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
3532          insn |= off8;
3533          *p++ = insn;
3534          goto done;
3535       }
3536       case ARMin_VAluD: {
3537          UInt dN = dregEnc(i->ARMin.VAluD.argL);
3538          UInt dD = dregEnc(i->ARMin.VAluD.dst);
3539          UInt dM = dregEnc(i->ARMin.VAluD.argR);
3540          UInt pqrs = X1111; /* undefined */
3541          switch (i->ARMin.VAluD.op) {
3542             case ARMvfp_ADD: pqrs = X0110; break;
3543             case ARMvfp_SUB: pqrs = X0111; break;
3544             case ARMvfp_MUL: pqrs = X0100; break;
3545             case ARMvfp_DIV: pqrs = X1000; break;
3546             default: goto bad;
3547          }
3548          vassert(pqrs != X1111);
3549          UInt bP  = (pqrs >> 3) & 1;
3550          UInt bQ  = (pqrs >> 2) & 1;
3551          UInt bR  = (pqrs >> 1) & 1;
3552          UInt bS  = (pqrs >> 0) & 1;
3553          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
3554                               X1011, BITS4(0,bS,0,0), dM);
3555          *p++ = insn;
3556          goto done;
3557       }
3558       case ARMin_VAluS: {
3559          UInt dN = fregEnc(i->ARMin.VAluS.argL);
3560          UInt dD = fregEnc(i->ARMin.VAluS.dst);
3561          UInt dM = fregEnc(i->ARMin.VAluS.argR);
3562          UInt bN = dN & 1;
3563          UInt bD = dD & 1;
3564          UInt bM = dM & 1;
3565          UInt pqrs = X1111; /* undefined */
3566          switch (i->ARMin.VAluS.op) {
3567             case ARMvfp_ADD: pqrs = X0110; break;
3568             case ARMvfp_SUB: pqrs = X0111; break;
3569             case ARMvfp_MUL: pqrs = X0100; break;
3570             case ARMvfp_DIV: pqrs = X1000; break;
3571             default: goto bad;
3572          }
3573          vassert(pqrs != X1111);
3574          UInt bP  = (pqrs >> 3) & 1;
3575          UInt bQ  = (pqrs >> 2) & 1;
3576          UInt bR  = (pqrs >> 1) & 1;
3577          UInt bS  = (pqrs >> 0) & 1;
3578          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
3579                               (dN >> 1), (dD >> 1),
3580                               X1010, BITS4(bN,bS,bM,0), (dM >> 1));
3581          *p++ = insn;
3582          goto done;
3583       }
3584       case ARMin_VUnaryD: {
3585          UInt dD   = dregEnc(i->ARMin.VUnaryD.dst);
3586          UInt dM   = dregEnc(i->ARMin.VUnaryD.src);
3587          UInt insn = 0;
3588          switch (i->ARMin.VUnaryD.op) {
3589             case ARMvfpu_COPY:
3590                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
3591                break;
3592             case ARMvfpu_ABS:
3593                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
3594                break;
3595             case ARMvfpu_NEG:
3596                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
3597                break;
3598             case ARMvfpu_SQRT:
3599                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
3600                break;
3601             default:
3602                goto bad;
3603          }
3604          *p++ = insn;
3605          goto done;
3606       }
3607       case ARMin_VUnaryS: {
3608          UInt fD   = fregEnc(i->ARMin.VUnaryS.dst);
3609          UInt fM   = fregEnc(i->ARMin.VUnaryS.src);
3610          UInt insn = 0;
3611          switch (i->ARMin.VUnaryS.op) {
3612             case ARMvfpu_COPY:
3613                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3614                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3615                                (fM >> 1));
3616                break;
3617             case ARMvfpu_ABS:
3618                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3619                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3620                                (fM >> 1));
3621                break;
3622             case ARMvfpu_NEG:
3623                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3624                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3625                                (fM >> 1));
3626                break;
3627             case ARMvfpu_SQRT:
3628                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3629                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3630                                (fM >> 1));
3631                break;
3632             default:
3633                goto bad;
3634          }
3635          *p++ = insn;
3636          goto done;
3637       }
3638       case ARMin_VCmpD: {
3639          UInt dD   = dregEnc(i->ARMin.VCmpD.argL);
3640          UInt dM   = dregEnc(i->ARMin.VCmpD.argR);
3641          UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
3642          *p++ = insn;       /* FCMPD dD, dM */
3643          *p++ = 0xEEF1FA10; /* FMSTAT */
3644          goto done;
3645       }
3646       case ARMin_VCMovD: {
3647          UInt cc = (UInt)i->ARMin.VCMovD.cond;
3648          UInt dD = dregEnc(i->ARMin.VCMovD.dst);
3649          UInt dM = dregEnc(i->ARMin.VCMovD.src);
3650          vassert(cc < 16 && cc != ARMcc_AL);
3651          UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
3652          *p++ = insn;
3653          goto done;
3654       }
3655       case ARMin_VCMovS: {
3656          UInt cc = (UInt)i->ARMin.VCMovS.cond;
3657          UInt fD = fregEnc(i->ARMin.VCMovS.dst);
3658          UInt fM = fregEnc(i->ARMin.VCMovS.src);
3659          vassert(cc < 16 && cc != ARMcc_AL);
3660          UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
3661                               X0000,(fD >> 1),X1010,
3662                               BITS4(0,1,(fM & 1),0), (fM >> 1));
3663          *p++ = insn;
3664          goto done;
3665       }
3666       case ARMin_VCvtSD: {
3667          if (i->ARMin.VCvtSD.sToD) {
3668             UInt dD = dregEnc(i->ARMin.VCvtSD.dst);
3669             UInt fM = fregEnc(i->ARMin.VCvtSD.src);
3670             UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
3671                                  BITS4(1,1, (fM & 1), 0),
3672                                  (fM >> 1));
3673             *p++ = insn;
3674             goto done;
3675          } else {
3676             UInt fD = fregEnc(i->ARMin.VCvtSD.dst);
3677             UInt dM = dregEnc(i->ARMin.VCvtSD.src);
3678             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
3679                                  X0111, (fD >> 1),
3680                                  X1011, X1100, dM);
3681             *p++ = insn;
3682             goto done;
3683          }
3684       }
3685       case ARMin_VXferD: {
3686          UInt dD  = dregEnc(i->ARMin.VXferD.dD);
3687          UInt rHi = iregEnc(i->ARMin.VXferD.rHi);
3688          UInt rLo = iregEnc(i->ARMin.VXferD.rLo);
3689          /* vmov dD, rLo, rHi is
3690             E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
3691             vmov rLo, rHi, dD is
3692             E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
3693          */
3694          UInt insn
3695             = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
3696                        rHi, rLo, 0xB,
3697                        BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
3698          *p++ = insn;
3699          goto done;
3700       }
3701       case ARMin_VXferS: {
3702          UInt fD  = fregEnc(i->ARMin.VXferS.fD);
3703          UInt rLo = iregEnc(i->ARMin.VXferS.rLo);
3704          /* vmov fD, rLo is
3705             E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
3706             vmov rLo, fD is
3707             E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
3708          */
3709          UInt insn
3710             = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
3711                        (fD >> 1) & 0xF, rLo, 0xA,
3712                        BITS4((fD & 1),0,0,1), 0);
3713          *p++ = insn;
3714          goto done;
3715       }
3716       case ARMin_VCvtID: {
3717          Bool iToD = i->ARMin.VCvtID.iToD;
3718          Bool syned = i->ARMin.VCvtID.syned;
3719          if (iToD && syned) {
3720             // FSITOD: I32S-in-freg to F64-in-dreg
3721             UInt regF = fregEnc(i->ARMin.VCvtID.src);
3722             UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3723             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3724                                  X1011, BITS4(1,1,(regF & 1),0),
3725                                  (regF >> 1) & 0xF);
3726             *p++ = insn;
3727             goto done;
3728          }
3729          if (iToD && (!syned)) {
3730             // FUITOD: I32U-in-freg to F64-in-dreg
3731             UInt regF = fregEnc(i->ARMin.VCvtID.src);
3732             UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3733             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3734                                  X1011, BITS4(0,1,(regF & 1),0),
3735                                  (regF >> 1) & 0xF);
3736             *p++ = insn;
3737             goto done;
3738          }
3739          if ((!iToD) && syned) {
3740             // FTOSID: F64-in-dreg to I32S-in-freg
3741             UInt regD = dregEnc(i->ARMin.VCvtID.src);
3742             UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3743             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3744                                  X1101, (regF >> 1) & 0xF,
3745                                  X1011, X0100, regD);
3746             *p++ = insn;
3747             goto done;
3748          }
3749          if ((!iToD) && (!syned)) {
3750             // FTOUID: F64-in-dreg to I32U-in-freg
3751             UInt regD = dregEnc(i->ARMin.VCvtID.src);
3752             UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3753             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3754                                  X1100, (regF >> 1) & 0xF,
3755                                  X1011, X0100, regD);
3756             *p++ = insn;
3757             goto done;
3758          }
3759          /*UNREACHED*/
3760          vassert(0);
3761       }
3762       case ARMin_FPSCR: {
3763          Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
3764          UInt iReg    = iregEnc(i->ARMin.FPSCR.iReg);
3765          if (toFPSCR) {
3766             /* fmxr fpscr, iReg is EEE1 iReg A10 */
3767             *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
3768             goto done;
3769          }
3770          goto bad; // FPSCR -> iReg case currently ATC
3771       }
3772       case ARMin_MFence: {
3773          // It's not clear (to me) how these relate to the ARMv7
3774          // versions, so let's just use the v7 versions as they
3775          // are at least well documented.
3776          //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
3777          //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
3778          //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4  (ISB) */
3779          *p++ = 0xF57FF04F; /* DSB sy */
3780          *p++ = 0xF57FF05F; /* DMB sy */
3781          *p++ = 0xF57FF06F; /* ISB */
3782          goto done;
3783       }
3784       case ARMin_CLREX: {
3785          *p++ = 0xF57FF01F; /* clrex */
3786          goto done;
3787       }
3788 
3789       case ARMin_NLdStQ: {
3790          UInt regD = qregEnc(i->ARMin.NLdStQ.dQ) << 1;
3791          UInt regN, regM;
3792          UInt D = regD >> 4;
3793          UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
3794          UInt insn;
3795          vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
3796          regD &= 0xF;
3797          if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
3798             regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
3799             regM = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
3800          } else {
3801             regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
3802             regM = 15;
3803          }
3804          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3805                               regN, regD, X1010, X1000, regM);
3806          *p++ = insn;
3807          goto done;
3808       }
3809       case ARMin_NLdStD: {
3810          UInt regD = dregEnc(i->ARMin.NLdStD.dD);
3811          UInt regN, regM;
3812          UInt D = regD >> 4;
3813          UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
3814          UInt insn;
3815          vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
3816          regD &= 0xF;
3817          if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
3818             regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
3819             regM = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
3820          } else {
3821             regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.R.rN);
3822             regM = 15;
3823          }
3824          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3825                               regN, regD, X0111, X1000, regM);
3826          *p++ = insn;
3827          goto done;
3828       }
3829       case ARMin_NUnaryS: {
3830          UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
3831          UInt regD, D;
3832          UInt regM, M;
3833          UInt size = i->ARMin.NUnaryS.size;
3834          UInt insn;
3835          UInt opc, opc1, opc2;
3836          switch (i->ARMin.NUnaryS.op) {
3837 	    case ARMneon_VDUP:
3838                if (i->ARMin.NUnaryS.size >= 16)
3839                   goto bad;
3840                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
3841                   goto bad;
3842                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3843                   goto bad;
3844                regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
3845                         ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1)
3846                         : dregEnc(i->ARMin.NUnaryS.dst->reg);
3847                regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
3848                         ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1)
3849                         : dregEnc(i->ARMin.NUnaryS.src->reg);
3850                D = regD >> 4;
3851                M = regM >> 4;
3852                regD &= 0xf;
3853                regM &= 0xf;
3854                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
3855                                (i->ARMin.NUnaryS.size & 0xf), regD,
3856                                X1100, BITS4(0,Q,M,0), regM);
3857                *p++ = insn;
3858                goto done;
3859             case ARMneon_SETELEM:
3860                regD = Q ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1) :
3861                                 dregEnc(i->ARMin.NUnaryS.dst->reg);
3862                regM = iregEnc(i->ARMin.NUnaryS.src->reg);
3863                M = regM >> 4;
3864                D = regD >> 4;
3865                regM &= 0xF;
3866                regD &= 0xF;
3867                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
3868                   goto bad;
3869                switch (size) {
3870                   case 0:
3871                      if (i->ARMin.NUnaryS.dst->index > 7)
3872                         goto bad;
3873                      opc = X1000 | i->ARMin.NUnaryS.dst->index;
3874                      break;
3875                   case 1:
3876                      if (i->ARMin.NUnaryS.dst->index > 3)
3877                         goto bad;
3878                      opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
3879                      break;
3880                   case 2:
3881                      if (i->ARMin.NUnaryS.dst->index > 1)
3882                         goto bad;
3883                      opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
3884                      break;
3885                   default:
3886                      goto bad;
3887                }
3888                opc1 = (opc >> 2) & 3;
3889                opc2 = opc & 3;
3890                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
3891                                regD, regM, X1011,
3892                                BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
3893                *p++ = insn;
3894                goto done;
3895             case ARMneon_GETELEMU:
3896                regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
3897                                 dregEnc(i->ARMin.NUnaryS.src->reg);
3898                regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
3899                M = regM >> 4;
3900                D = regD >> 4;
3901                regM &= 0xF;
3902                regD &= 0xF;
3903                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3904                   goto bad;
3905                switch (size) {
3906                   case 0:
3907                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
3908                         regM++;
3909                         i->ARMin.NUnaryS.src->index -= 8;
3910                      }
3911                      if (i->ARMin.NUnaryS.src->index > 7)
3912                         goto bad;
3913                      opc = X1000 | i->ARMin.NUnaryS.src->index;
3914                      break;
3915                   case 1:
3916                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
3917                         regM++;
3918                         i->ARMin.NUnaryS.src->index -= 4;
3919                      }
3920                      if (i->ARMin.NUnaryS.src->index > 3)
3921                         goto bad;
3922                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3923                      break;
3924                   case 2:
3925                      goto bad;
3926                   default:
3927                      goto bad;
3928                }
3929                opc1 = (opc >> 2) & 3;
3930                opc2 = opc & 3;
3931                insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
3932                                regM, regD, X1011,
3933                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3934                *p++ = insn;
3935                goto done;
3936             case ARMneon_GETELEMS:
3937                regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
3938                                 dregEnc(i->ARMin.NUnaryS.src->reg);
3939                regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
3940                M = regM >> 4;
3941                D = regD >> 4;
3942                regM &= 0xF;
3943                regD &= 0xF;
3944                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3945                   goto bad;
3946                switch (size) {
3947                   case 0:
3948                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
3949                         regM++;
3950                         i->ARMin.NUnaryS.src->index -= 8;
3951                      }
3952                      if (i->ARMin.NUnaryS.src->index > 7)
3953                         goto bad;
3954                      opc = X1000 | i->ARMin.NUnaryS.src->index;
3955                      break;
3956                   case 1:
3957                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
3958                         regM++;
3959                         i->ARMin.NUnaryS.src->index -= 4;
3960                      }
3961                      if (i->ARMin.NUnaryS.src->index > 3)
3962                         goto bad;
3963                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3964                      break;
3965                   case 2:
3966                      if (Q && i->ARMin.NUnaryS.src->index > 1) {
3967                         regM++;
3968                         i->ARMin.NUnaryS.src->index -= 2;
3969                      }
3970                      if (i->ARMin.NUnaryS.src->index > 1)
3971                         goto bad;
3972                      opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
3973                      break;
3974                   default:
3975                      goto bad;
3976                }
3977                opc1 = (opc >> 2) & 3;
3978                opc2 = opc & 3;
3979                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
3980                                regM, regD, X1011,
3981                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3982                *p++ = insn;
3983                goto done;
3984             default:
3985                goto bad;
3986          }
3987       }
3988       case ARMin_NUnary: {
3989          UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
3990          UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
3991                        ? (qregEnc(i->ARMin.NUnary.dst) << 1)
3992                        : dregEnc(i->ARMin.NUnary.dst);
3993          UInt regM, M;
3994          UInt D = regD >> 4;
3995          UInt sz1 = i->ARMin.NUnary.size >> 1;
3996          UInt sz2 = i->ARMin.NUnary.size & 1;
3997          UInt sz = i->ARMin.NUnary.size;
3998          UInt insn;
3999          UInt F = 0; /* TODO: floating point EQZ ??? */
4000          if (i->ARMin.NUnary.op != ARMneon_DUP) {
4001             regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
4002                      ? (qregEnc(i->ARMin.NUnary.src) << 1)
4003                      : dregEnc(i->ARMin.NUnary.src);
4004             M = regM >> 4;
4005          } else {
4006             regM = iregEnc(i->ARMin.NUnary.src);
4007             M = regM >> 4;
4008          }
4009          regD &= 0xF;
4010          regM &= 0xF;
4011          switch (i->ARMin.NUnary.op) {
4012             case ARMneon_COPY: /* VMOV reg, reg */
4013                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
4014                                BITS4(M,Q,M,1), regM);
4015                break;
4016             case ARMneon_COPYN: /* VMOVN regD, regQ */
4017                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4018                                regD, X0010, BITS4(0,0,M,0), regM);
4019                break;
4020             case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
4021                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4022                                regD, X0010, BITS4(1,0,M,0), regM);
4023                break;
4024             case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
4025                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4026                                regD, X0010, BITS4(0,1,M,0), regM);
4027                break;
4028             case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
4029                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4030                                regD, X0010, BITS4(1,1,M,0), regM);
4031                break;
4032             case ARMneon_COPYLS: /* VMOVL regQ, regD */
4033                if (sz >= 3)
4034                   goto bad;
4035                insn = XXXXXXXX(0xF, X0010,
4036                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4037                                BITS4((sz == 0) ? 1 : 0,0,0,0),
4038                                regD, X1010, BITS4(0,0,M,1), regM);
4039                break;
4040             case ARMneon_COPYLU: /* VMOVL regQ, regD */
4041                if (sz >= 3)
4042                   goto bad;
4043                insn = XXXXXXXX(0xF, X0011,
4044                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4045                                BITS4((sz == 0) ? 1 : 0,0,0,0),
4046                                regD, X1010, BITS4(0,0,M,1), regM);
4047                break;
4048             case ARMneon_NOT: /* VMVN reg, reg*/
4049                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4050                                BITS4(1,Q,M,0), regM);
4051                break;
4052             case ARMneon_EQZ:
4053                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4054                                regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
4055                break;
4056             case ARMneon_CNT:
4057                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4058                                BITS4(0,Q,M,0), regM);
4059                break;
4060             case ARMneon_CLZ:
4061                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4062                                regD, X0100, BITS4(1,Q,M,0), regM);
4063                break;
4064             case ARMneon_CLS:
4065                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4066                                regD, X0100, BITS4(0,Q,M,0), regM);
4067                break;
4068             case ARMneon_ABS:
4069                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4070                                regD, X0011, BITS4(0,Q,M,0), regM);
4071                break;
4072             case ARMneon_DUP:
4073                sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
4074                sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
4075                vassert(sz1 + sz2 < 2);
4076                insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
4077                                X1011, BITS4(D,0,sz2,1), X0000);
4078                break;
4079             case ARMneon_REV16:
4080                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4081                                regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
4082                break;
4083             case ARMneon_REV32:
4084                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4085                                regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
4086                break;
4087             case ARMneon_REV64:
4088                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4089                                regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
4090                break;
4091             case ARMneon_PADDLU:
4092                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4093                                regD, X0010, BITS4(1,Q,M,0), regM);
4094                break;
4095             case ARMneon_PADDLS:
4096                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4097                                regD, X0010, BITS4(0,Q,M,0), regM);
4098                break;
4099             case ARMneon_VQSHLNUU:
4100                insn = XXXXXXXX(0xF, X0011,
4101                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4102                                sz & 0xf, regD, X0111,
4103                                BITS4(sz >> 6,Q,M,1), regM);
4104                break;
4105             case ARMneon_VQSHLNSS:
4106                insn = XXXXXXXX(0xF, X0010,
4107                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4108                                sz & 0xf, regD, X0111,
4109                                BITS4(sz >> 6,Q,M,1), regM);
4110                break;
4111             case ARMneon_VQSHLNUS:
4112                insn = XXXXXXXX(0xF, X0011,
4113                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4114                                sz & 0xf, regD, X0110,
4115                                BITS4(sz >> 6,Q,M,1), regM);
4116                break;
4117             case ARMneon_VCVTFtoS:
4118                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4119                                BITS4(0,Q,M,0), regM);
4120                break;
4121             case ARMneon_VCVTFtoU:
4122                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4123                                BITS4(1,Q,M,0), regM);
4124                break;
4125             case ARMneon_VCVTStoF:
4126                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4127                                BITS4(0,Q,M,0), regM);
4128                break;
4129             case ARMneon_VCVTUtoF:
4130                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4131                                BITS4(1,Q,M,0), regM);
4132                break;
4133             case ARMneon_VCVTFtoFixedU:
4134                sz1 = (sz >> 5) & 1;
4135                sz2 = (sz >> 4) & 1;
4136                sz &= 0xf;
4137                insn = XXXXXXXX(0xF, X0011,
4138                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
4139                                BITS4(0,Q,M,1), regM);
4140                break;
4141             case ARMneon_VCVTFtoFixedS:
4142                sz1 = (sz >> 5) & 1;
4143                sz2 = (sz >> 4) & 1;
4144                sz &= 0xf;
4145                insn = XXXXXXXX(0xF, X0010,
4146                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
4147                                BITS4(0,Q,M,1), regM);
4148                break;
4149             case ARMneon_VCVTFixedUtoF:
4150                sz1 = (sz >> 5) & 1;
4151                sz2 = (sz >> 4) & 1;
4152                sz &= 0xf;
4153                insn = XXXXXXXX(0xF, X0011,
4154                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
4155                                BITS4(0,Q,M,1), regM);
4156                break;
4157             case ARMneon_VCVTFixedStoF:
4158                sz1 = (sz >> 5) & 1;
4159                sz2 = (sz >> 4) & 1;
4160                sz &= 0xf;
4161                insn = XXXXXXXX(0xF, X0010,
4162                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
4163                                BITS4(0,Q,M,1), regM);
4164                break;
4165             case ARMneon_VCVTF32toF16:
4166                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
4167                                BITS4(0,0,M,0), regM);
4168                break;
4169             case ARMneon_VCVTF16toF32:
4170                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
4171                                BITS4(0,0,M,0), regM);
4172                break;
4173             case ARMneon_VRECIP:
4174                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4175                                BITS4(0,Q,M,0), regM);
4176                break;
4177             case ARMneon_VRECIPF:
4178                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4179                                BITS4(0,Q,M,0), regM);
4180                break;
4181             case ARMneon_VABSFP:
4182                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4183                                BITS4(0,Q,M,0), regM);
4184                break;
4185             case ARMneon_VRSQRTEFP:
4186                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4187                                BITS4(1,Q,M,0), regM);
4188                break;
4189             case ARMneon_VRSQRTE:
4190                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4191                                BITS4(1,Q,M,0), regM);
4192                break;
4193             case ARMneon_VNEGF:
4194                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4195                                BITS4(1,Q,M,0), regM);
4196                break;
4197 
4198             default:
4199                goto bad;
4200          }
4201          *p++ = insn;
4202          goto done;
4203       }
4204       case ARMin_NDual: {
4205          UInt Q = i->ARMin.NDual.Q ? 1 : 0;
4206          UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
4207                        ? (qregEnc(i->ARMin.NDual.arg1) << 1)
4208                        : dregEnc(i->ARMin.NDual.arg1);
4209          UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
4210                        ? (qregEnc(i->ARMin.NDual.arg2) << 1)
4211                        : dregEnc(i->ARMin.NDual.arg2);
4212          UInt D = regD >> 4;
4213          UInt M = regM >> 4;
4214          UInt sz1 = i->ARMin.NDual.size >> 1;
4215          UInt sz2 = i->ARMin.NDual.size & 1;
4216          UInt insn;
4217          regD &= 0xF;
4218          regM &= 0xF;
4219          switch (i->ARMin.NDual.op) {
4220             case ARMneon_TRN: /* VTRN reg, reg */
4221                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4222                                regD, X0000, BITS4(1,Q,M,0), regM);
4223                break;
4224             case ARMneon_ZIP: /* VZIP reg, reg */
4225                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4226                                regD, X0001, BITS4(1,Q,M,0), regM);
4227                break;
4228             case ARMneon_UZP: /* VUZP reg, reg */
4229                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4230                                regD, X0001, BITS4(0,Q,M,0), regM);
4231                break;
4232             default:
4233                goto bad;
4234          }
4235          *p++ = insn;
4236          goto done;
4237       }
4238       case ARMin_NBinary: {
4239          UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
4240          UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
4241                        ? (qregEnc(i->ARMin.NBinary.dst) << 1)
4242                        : dregEnc(i->ARMin.NBinary.dst);
4243          UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
4244                        ? (qregEnc(i->ARMin.NBinary.argL) << 1)
4245                        : dregEnc(i->ARMin.NBinary.argL);
4246          UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
4247                        ? (qregEnc(i->ARMin.NBinary.argR) << 1)
4248                        : dregEnc(i->ARMin.NBinary.argR);
4249          UInt sz1 = i->ARMin.NBinary.size >> 1;
4250          UInt sz2 = i->ARMin.NBinary.size & 1;
4251          UInt D = regD >> 4;
4252          UInt N = regN >> 4;
4253          UInt M = regM >> 4;
4254          UInt insn;
4255          regD &= 0xF;
4256          regM &= 0xF;
4257          regN &= 0xF;
4258          switch (i->ARMin.NBinary.op) {
4259             case ARMneon_VAND: /* VAND reg, reg, reg */
4260                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
4261                                BITS4(N,Q,M,1), regM);
4262                break;
4263             case ARMneon_VORR: /* VORR reg, reg, reg*/
4264                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
4265                                BITS4(N,Q,M,1), regM);
4266                break;
4267             case ARMneon_VXOR: /* VEOR reg, reg, reg */
4268                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
4269                                BITS4(N,Q,M,1), regM);
4270                break;
4271             case ARMneon_VADD: /* VADD reg, reg, reg */
4272                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4273                                X1000, BITS4(N,Q,M,0), regM);
4274                break;
4275             case ARMneon_VSUB: /* VSUB reg, reg, reg */
4276                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4277                                X1000, BITS4(N,Q,M,0), regM);
4278                break;
4279             case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
4280                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4281                                X0110, BITS4(N,Q,M,1), regM);
4282                break;
4283             case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
4284                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4285                                X0110, BITS4(N,Q,M,1), regM);
4286                break;
4287             case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
4288                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4289                                X0110, BITS4(N,Q,M,0), regM);
4290                break;
4291             case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
4292                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4293                                X0110, BITS4(N,Q,M,0), regM);
4294                break;
4295             case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
4296                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4297                                X0001, BITS4(N,Q,M,0), regM);
4298                break;
4299             case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
4300                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4301                                X0001, BITS4(N,Q,M,0), regM);
4302                break;
4303             case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
4304                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4305                                X0000, BITS4(N,Q,M,1), regM);
4306                break;
4307             case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
4308                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4309                                X0000, BITS4(N,Q,M,1), regM);
4310                break;
4311             case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
4312                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4313                                X0010, BITS4(N,Q,M,1), regM);
4314                break;
4315             case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
4316                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4317                                X0010, BITS4(N,Q,M,1), regM);
4318                break;
4319             case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
4320                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4321                                X0011, BITS4(N,Q,M,0), regM);
4322                break;
4323             case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
4324                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4325                                X0011, BITS4(N,Q,M,0), regM);
4326                break;
4327             case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
4328                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4329                                X0011, BITS4(N,Q,M,1), regM);
4330                break;
4331             case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
4332                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4333                                X0011, BITS4(N,Q,M,1), regM);
4334                break;
4335             case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
4336                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4337                                X1000, BITS4(N,Q,M,1), regM);
4338                break;
4339             case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
4340                if (i->ARMin.NBinary.size >= 16)
4341                   goto bad;
4342                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
4343                                i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
4344                                regM);
4345                break;
4346             case ARMneon_VMUL:
4347                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4348                                X1001, BITS4(N,Q,M,1), regM);
4349                break;
4350             case ARMneon_VMULLU:
4351                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
4352                                X1100, BITS4(N,0,M,0), regM);
4353                break;
4354             case ARMneon_VMULLS:
4355                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4356                                X1100, BITS4(N,0,M,0), regM);
4357                break;
4358             case ARMneon_VMULP:
4359                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4360                                X1001, BITS4(N,Q,M,1), regM);
4361                break;
4362             case ARMneon_VMULFP:
4363                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4364                                X1101, BITS4(N,Q,M,1), regM);
4365                break;
4366             case ARMneon_VMULLP:
4367                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4368                                X1110, BITS4(N,0,M,0), regM);
4369                break;
4370             case ARMneon_VQDMULH:
4371                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4372                                X1011, BITS4(N,Q,M,0), regM);
4373                break;
4374             case ARMneon_VQRDMULH:
4375                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4376                                X1011, BITS4(N,Q,M,0), regM);
4377                break;
4378             case ARMneon_VQDMULL:
4379                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4380                                X1101, BITS4(N,0,M,0), regM);
4381                break;
4382             case ARMneon_VTBL:
4383                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
4384                                X1000, BITS4(N,0,M,0), regM);
4385                break;
4386             case ARMneon_VPADD:
4387                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4388                                X1011, BITS4(N,Q,M,1), regM);
4389                break;
4390             case ARMneon_VPADDFP:
4391                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4392                                X1101, BITS4(N,Q,M,0), regM);
4393                break;
4394             case ARMneon_VPMINU:
4395                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4396                                X1010, BITS4(N,Q,M,1), regM);
4397                break;
4398             case ARMneon_VPMINS:
4399                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4400                                X1010, BITS4(N,Q,M,1), regM);
4401                break;
4402             case ARMneon_VPMAXU:
4403                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4404                                X1010, BITS4(N,Q,M,0), regM);
4405                break;
4406             case ARMneon_VPMAXS:
4407                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4408                                X1010, BITS4(N,Q,M,0), regM);
4409                break;
4410             case ARMneon_VADDFP: /* VADD reg, reg, reg */
4411                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4412                                X1101, BITS4(N,Q,M,0), regM);
4413                break;
4414             case ARMneon_VSUBFP: /* VADD reg, reg, reg */
4415                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4416                                X1101, BITS4(N,Q,M,0), regM);
4417                break;
4418             case ARMneon_VABDFP: /* VABD reg, reg, reg */
4419                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4420                                X1101, BITS4(N,Q,M,0), regM);
4421                break;
4422             case ARMneon_VMINF:
4423                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4424                                X1111, BITS4(N,Q,M,0), regM);
4425                break;
4426             case ARMneon_VMAXF:
4427                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4428                                X1111, BITS4(N,Q,M,0), regM);
4429                break;
4430             case ARMneon_VPMINF:
4431                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4432                                X1111, BITS4(N,Q,M,0), regM);
4433                break;
4434             case ARMneon_VPMAXF:
4435                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4436                                X1111, BITS4(N,Q,M,0), regM);
4437                break;
4438             case ARMneon_VRECPS:
4439                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
4440                                BITS4(N,Q,M,1), regM);
4441                break;
4442             case ARMneon_VCGTF:
4443                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
4444                                BITS4(N,Q,M,0), regM);
4445                break;
4446             case ARMneon_VCGEF:
4447                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
4448                                BITS4(N,Q,M,0), regM);
4449                break;
4450             case ARMneon_VCEQF:
4451                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
4452                                BITS4(N,Q,M,0), regM);
4453                break;
4454             case ARMneon_VRSQRTS:
4455                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
4456                                BITS4(N,Q,M,1), regM);
4457                break;
4458             default:
4459                goto bad;
4460          }
4461          *p++ = insn;
4462          goto done;
4463       }
4464       case ARMin_NShift: {
4465          UInt Q = i->ARMin.NShift.Q ? 1 : 0;
4466          UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
4467                        ? (qregEnc(i->ARMin.NShift.dst) << 1)
4468                        : dregEnc(i->ARMin.NShift.dst);
4469          UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
4470                        ? (qregEnc(i->ARMin.NShift.argL) << 1)
4471                        : dregEnc(i->ARMin.NShift.argL);
4472          UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
4473                        ? (qregEnc(i->ARMin.NShift.argR) << 1)
4474                        : dregEnc(i->ARMin.NShift.argR);
4475          UInt sz1 = i->ARMin.NShift.size >> 1;
4476          UInt sz2 = i->ARMin.NShift.size & 1;
4477          UInt D = regD >> 4;
4478          UInt N = regN >> 4;
4479          UInt M = regM >> 4;
4480          UInt insn;
4481          regD &= 0xF;
4482          regM &= 0xF;
4483          regN &= 0xF;
4484          switch (i->ARMin.NShift.op) {
4485             case ARMneon_VSHL:
4486                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4487                                X0100, BITS4(N,Q,M,0), regM);
4488                break;
4489             case ARMneon_VSAL:
4490                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4491                                X0100, BITS4(N,Q,M,0), regM);
4492                break;
4493             case ARMneon_VQSHL:
4494                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4495                                X0100, BITS4(N,Q,M,1), regM);
4496                break;
4497             case ARMneon_VQSAL:
4498                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4499                                X0100, BITS4(N,Q,M,1), regM);
4500                break;
4501             default:
4502                goto bad;
4503          }
4504          *p++ = insn;
4505          goto done;
4506       }
4507       case ARMin_NShl64: {
4508          HReg regDreg = i->ARMin.NShl64.dst;
4509          HReg regMreg = i->ARMin.NShl64.src;
4510          UInt amt     = i->ARMin.NShl64.amt;
4511          vassert(amt >= 1 && amt <= 63);
4512          vassert(hregClass(regDreg) == HRcFlt64);
4513          vassert(hregClass(regMreg) == HRcFlt64);
4514          UInt regD = dregEnc(regDreg);
4515          UInt regM = dregEnc(regMreg);
4516          UInt D    = (regD >> 4) & 1;
4517          UInt Vd   = regD & 0xF;
4518          UInt L    = 1;
4519          UInt Q    = 0; /* always 64-bit */
4520          UInt M    = (regM >> 4) & 1;
4521          UInt Vm   = regM & 0xF;
4522          UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
4523                               amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
4524          *p++ = insn;
4525          goto done;
4526       }
4527       case ARMin_NeonImm: {
4528          UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
4529          UInt regD = Q ? (qregEnc(i->ARMin.NeonImm.dst) << 1) :
4530                           dregEnc(i->ARMin.NeonImm.dst);
4531          UInt D = regD >> 4;
4532          UInt imm = i->ARMin.NeonImm.imm->imm8;
4533          UInt tp = i->ARMin.NeonImm.imm->type;
4534          UInt j = imm >> 7;
4535          UInt imm3 = (imm >> 4) & 0x7;
4536          UInt imm4 = imm & 0xF;
4537          UInt cmode, op;
4538          UInt insn;
4539          regD &= 0xF;
4540          if (tp == 9)
4541             op = 1;
4542          else
4543             op = 0;
4544          switch (tp) {
4545             case 0:
4546             case 1:
4547             case 2:
4548             case 3:
4549             case 4:
4550             case 5:
4551                cmode = tp << 1;
4552                break;
4553             case 9:
4554             case 6:
4555                cmode = 14;
4556                break;
4557             case 7:
4558                cmode = 12;
4559                break;
4560             case 8:
4561                cmode = 13;
4562                break;
4563             case 10:
4564                cmode = 15;
4565                break;
4566             default:
4567                vpanic("ARMin_NeonImm");
4568 
4569          }
4570          insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
4571                          cmode, BITS4(0,Q,op,1), imm4);
4572          *p++ = insn;
4573          goto done;
4574       }
4575       case ARMin_NCMovQ: {
4576          UInt cc = (UInt)i->ARMin.NCMovQ.cond;
4577          UInt qM = qregEnc(i->ARMin.NCMovQ.src) << 1;
4578          UInt qD = qregEnc(i->ARMin.NCMovQ.dst) << 1;
4579          UInt vM = qM & 0xF;
4580          UInt vD = qD & 0xF;
4581          UInt M  = (qM >> 4) & 1;
4582          UInt D  = (qD >> 4) & 1;
4583          vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
4584          /* b!cc here+8: !cc A00 0000 */
4585          UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
4586          *p++ = insn;
4587          /* vmov qD, qM */
4588          insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
4589                          vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
4590          *p++ = insn;
4591          goto done;
4592       }
4593       case ARMin_Add32: {
4594          UInt regD = iregEnc(i->ARMin.Add32.rD);
4595          UInt regN = iregEnc(i->ARMin.Add32.rN);
4596          UInt imm32 = i->ARMin.Add32.imm32;
4597          vassert(regD != regN);
4598          /* MOV regD, imm32 */
4599          p = imm32_to_ireg((UInt *)p, regD, imm32);
4600          /* ADD regD, regN, regD */
4601          UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
4602          *p++ = insn;
4603          goto done;
4604       }
4605 
4606       case ARMin_EvCheck: {
4607          /* We generate:
4608                ldr  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
4609                subs r12, r12, #1  (A1)
4610                str  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
4611                bpl  nofail
4612                ldr  r12, [r8 + #0]   0 == offsetof(host_EvC_FAILADDR)
4613                bx   r12
4614               nofail:
4615          */
4616          UInt* p0 = p;
4617          p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4618                                 i->ARMin.EvCheck.amCounter);
4619          *p++ = 0xE25CC001; /* subs r12, r12, #1 */
4620          p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
4621                                 i->ARMin.EvCheck.amCounter);
4622          *p++ = 0x5A000001; /* bpl nofail */
4623          p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4624                                 i->ARMin.EvCheck.amFailAddr);
4625          *p++ = 0xE12FFF1C; /* bx r12 */
4626          /* nofail: */
4627 
4628          /* Crosscheck */
4629          vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
4630          goto done;
4631       }
4632 
4633       case ARMin_ProfInc: {
4634          /* We generate:
4635               (ctrP is unknown now, so use 0x65556555 in the
4636               expectation that a later call to LibVEX_patchProfCtr
4637               will be used to fill in the immediate fields once the
4638               right value is known.)
4639             movw r12, lo16(0x65556555)
4640             movt r12, lo16(0x65556555)
4641             ldr  r11, [r12]
4642             adds r11, r11, #1
4643             str  r11, [r12]
4644             ldr  r11, [r12+4]
4645             adc  r11, r11, #0
4646             str  r11, [r12+4]
4647          */
4648          p = imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555);
4649          *p++ = 0xE59CB000;
4650          *p++ = 0xE29BB001;
4651          *p++ = 0xE58CB000;
4652          *p++ = 0xE59CB004;
4653          *p++ = 0xE2ABB000;
4654          *p++ = 0xE58CB004;
4655          /* Tell the caller .. */
4656          vassert(!(*is_profInc));
4657          *is_profInc = True;
4658          goto done;
4659       }
4660 
4661       /* ... */
4662       default:
4663          goto bad;
4664     }
4665 
4666   bad:
4667    ppARMInstr(i);
4668    vpanic("emit_ARMInstr");
4669    /*NOTREACHED*/
4670 
4671   done:
4672    vassert(((UChar*)p) - &buf[0] <= 32);
4673    return ((UChar*)p) - &buf[0];
4674 }
4675 
4676 
4677 /* How big is an event check?  See case for ARMin_EvCheck in
4678    emit_ARMInstr just above.  That crosschecks what this returns, so
4679    we can tell if we're inconsistent. */
evCheckSzB_ARM(void)4680 Int evCheckSzB_ARM (void)
4681 {
4682    return 24;
4683 }
4684 
4685 
4686 /* NB: what goes on here has to be very closely coordinated with the
4687    emitInstr case for XDirect, above. */
chainXDirect_ARM(VexEndness endness_host,void * place_to_chain,const void * disp_cp_chain_me_EXPECTED,const void * place_to_jump_to)4688 VexInvalRange chainXDirect_ARM ( VexEndness endness_host,
4689                                  void* place_to_chain,
4690                                  const void* disp_cp_chain_me_EXPECTED,
4691                                  const void* place_to_jump_to )
4692 {
4693    vassert(endness_host == VexEndnessLE);
4694 
4695    /* What we're expecting to see is:
4696         movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
4697         movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
4698         blx  r12
4699       viz
4700         <8 bytes generated by imm32_to_ireg_EXACTLY2>
4701         E1 2F FF 3C
4702    */
4703    UInt* p = (UInt*)place_to_chain;
4704    vassert(0 == (3 & (HWord)p));
4705    vassert(is_imm32_to_ireg_EXACTLY2(
4706               p, /*r*/12, (UInt)(Addr)disp_cp_chain_me_EXPECTED));
4707    vassert(p[2] == 0xE12FFF3C);
4708    /* And what we want to change it to is either:
4709         (general case)
4710           movw r12, lo16(place_to_jump_to)
4711           movt r12, hi16(place_to_jump_to)
4712           bx   r12
4713         viz
4714           <8 bytes generated by imm32_to_ireg_EXACTLY2>
4715           E1 2F FF 1C
4716       ---OR---
4717         in the case where the displacement falls within 26 bits
4718           b disp24; undef; undef
4719         viz
4720           EA <3 bytes == disp24>
4721           FF 00 00 00
4722           FF 00 00 00
4723 
4724       In both cases the replacement has the same length as the original.
4725       To remain sane & verifiable,
4726       (1) limit the displacement for the short form to
4727           (say) +/- 30 million, so as to avoid wraparound
4728           off-by-ones
4729       (2) even if the short form is applicable, once every (say)
4730           1024 times use the long form anyway, so as to maintain
4731           verifiability
4732    */
4733 
4734    /* This is the delta we need to put into a B insn.  It's relative
4735       to the start of the next-but-one insn, hence the -8.  */
4736    Long delta   = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 8;
4737    Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
4738    vassert(0 == (delta & (Long)3));
4739 
4740    static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4741    if (shortOK) {
4742       shortCTR++; // thread safety bleh
4743       if (0 == (shortCTR & 0x3FF)) {
4744          shortOK = False;
4745          if (0)
4746             vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
4747                        "using long form\n", shortCTR);
4748       }
4749    }
4750 
4751    /* And make the modifications. */
4752    if (shortOK) {
4753       Int simm24 = (Int)(delta >> 2);
4754       vassert(simm24 == ((simm24 << 8) >> 8));
4755       p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
4756       p[1] = 0xFF000000;
4757       p[2] = 0xFF000000;
4758    } else {
4759       (void)imm32_to_ireg_EXACTLY2(
4760                p, /*r*/12, (UInt)(Addr)place_to_jump_to);
4761       p[2] = 0xE12FFF1C;
4762    }
4763 
4764    VexInvalRange vir = {(HWord)p, 12};
4765    return vir;
4766 }
4767 
4768 
4769 /* NB: what goes on here has to be very closely coordinated with the
4770    emitInstr case for XDirect, above. */
unchainXDirect_ARM(VexEndness endness_host,void * place_to_unchain,const void * place_to_jump_to_EXPECTED,const void * disp_cp_chain_me)4771 VexInvalRange unchainXDirect_ARM ( VexEndness endness_host,
4772                                    void* place_to_unchain,
4773                                    const void* place_to_jump_to_EXPECTED,
4774                                    const void* disp_cp_chain_me )
4775 {
4776    vassert(endness_host == VexEndnessLE);
4777 
4778    /* What we're expecting to see is:
4779         (general case)
4780           movw r12, lo16(place_to_jump_to_EXPECTED)
4781           movt r12, lo16(place_to_jump_to_EXPECTED)
4782           bx   r12
4783         viz
4784           <8 bytes generated by imm32_to_ireg_EXACTLY2>
4785           E1 2F FF 1C
4786       ---OR---
4787         in the case where the displacement falls within 26 bits
4788           b disp24; undef; undef
4789         viz
4790           EA <3 bytes == disp24>
4791           FF 00 00 00
4792           FF 00 00 00
4793    */
4794    UInt* p = (UInt*)place_to_unchain;
4795    vassert(0 == (3 & (HWord)p));
4796 
4797    Bool valid = False;
4798    if (is_imm32_to_ireg_EXACTLY2(
4799           p, /*r*/12, (UInt)(Addr)place_to_jump_to_EXPECTED)
4800        && p[2] == 0xE12FFF1C) {
4801       valid = True; /* it's the long form */
4802       if (0)
4803          vex_printf("QQQ unchainXDirect_ARM: found long form\n");
4804    } else
4805    if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
4806       /* It's the short form.  Check the displacement is right. */
4807       Int simm24 = p[0] & 0x00FFFFFF;
4808       simm24 <<= 8; simm24 >>= 8;
4809       if ((UChar*)p + (simm24 << 2) + 8 == place_to_jump_to_EXPECTED) {
4810          valid = True;
4811          if (0)
4812             vex_printf("QQQ unchainXDirect_ARM: found short form\n");
4813       }
4814    }
4815    vassert(valid);
4816 
4817    /* And what we want to change it to is:
4818         movw r12, lo16(disp_cp_chain_me)
4819         movt r12, hi16(disp_cp_chain_me)
4820         blx  r12
4821       viz
4822         <8 bytes generated by imm32_to_ireg_EXACTLY2>
4823         E1 2F FF 3C
4824    */
4825    (void)imm32_to_ireg_EXACTLY2(
4826             p, /*r*/12, (UInt)(Addr)disp_cp_chain_me);
4827    p[2] = 0xE12FFF3C;
4828    VexInvalRange vir = {(HWord)p, 12};
4829    return vir;
4830 }
4831 
4832 
4833 /* Patch the counter address into a profile inc point, as previously
4834    created by the ARMin_ProfInc case for emit_ARMInstr. */
patchProfInc_ARM(VexEndness endness_host,void * place_to_patch,const ULong * location_of_counter)4835 VexInvalRange patchProfInc_ARM ( VexEndness endness_host,
4836                                  void*  place_to_patch,
4837                                  const ULong* location_of_counter )
4838 {
4839    vassert(endness_host == VexEndnessLE);
4840    vassert(sizeof(ULong*) == 4);
4841    UInt* p = (UInt*)place_to_patch;
4842    vassert(0 == (3 & (HWord)p));
4843    vassert(is_imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555));
4844    vassert(p[2] == 0xE59CB000);
4845    vassert(p[3] == 0xE29BB001);
4846    vassert(p[4] == 0xE58CB000);
4847    vassert(p[5] == 0xE59CB004);
4848    vassert(p[6] == 0xE2ABB000);
4849    vassert(p[7] == 0xE58CB004);
4850    imm32_to_ireg_EXACTLY2(p, /*r*/12, (UInt)(Addr)location_of_counter);
4851    VexInvalRange vir = {(HWord)p, 8};
4852    return vir;
4853 }
4854 
4855 
4856 #undef BITS4
4857 #undef X0000
4858 #undef X0001
4859 #undef X0010
4860 #undef X0011
4861 #undef X0100
4862 #undef X0101
4863 #undef X0110
4864 #undef X0111
4865 #undef X1000
4866 #undef X1001
4867 #undef X1010
4868 #undef X1011
4869 #undef X1100
4870 #undef X1101
4871 #undef X1110
4872 #undef X1111
4873 #undef XXXXX___
4874 #undef XXXXXX__
4875 #undef XXX___XX
4876 #undef XXXXX__X
4877 #undef XXXXXXXX
4878 #undef XX______
4879 
4880 /*---------------------------------------------------------------*/
4881 /*--- end                                     host_arm_defs.c ---*/
4882 /*---------------------------------------------------------------*/
4883