• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                   host_arm_defs.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2013 OpenWorks LLP
11       info@open-works.net
12 
13    NEON support is
14    Copyright (C) 2010-2013 Samsung Electronics
15    contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16               and Kirill Batuzov <batuzovk@ispras.ru>
17 
18    This program is free software; you can redistribute it and/or
19    modify it under the terms of the GNU General Public License as
20    published by the Free Software Foundation; either version 2 of the
21    License, or (at your option) any later version.
22 
23    This program is distributed in the hope that it will be useful, but
24    WITHOUT ANY WARRANTY; without even the implied warranty of
25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26    General Public License for more details.
27 
28    You should have received a copy of the GNU General Public License
29    along with this program; if not, write to the Free Software
30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31    02110-1301, USA.
32 
33    The GNU General Public License is contained in the file COPYING.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
39 
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_arm_defs.h"
43 
44 UInt arm_hwcaps = 0;
45 
46 
47 /* --------- Registers. --------- */
48 
49 /* The usual HReg abstraction.
50    There are 16 general purpose regs.
51 */
52 
ppHRegARM(HReg reg)53 void ppHRegARM ( HReg reg )  {
54    Int r;
55    /* Be generic for all virtual regs. */
56    if (hregIsVirtual(reg)) {
57       ppHReg(reg);
58       return;
59    }
60    /* But specific for real regs. */
61    switch (hregClass(reg)) {
62       case HRcInt32:
63          r = hregNumber(reg);
64          vassert(r >= 0 && r < 16);
65          vex_printf("r%d", r);
66          return;
67       case HRcFlt64:
68          r = hregNumber(reg);
69          vassert(r >= 0 && r < 32);
70          vex_printf("d%d", r);
71          return;
72       case HRcFlt32:
73          r = hregNumber(reg);
74          vassert(r >= 0 && r < 32);
75          vex_printf("s%d", r);
76          return;
77       case HRcVec128:
78          r = hregNumber(reg);
79          vassert(r >= 0 && r < 16);
80          vex_printf("q%d", r);
81          return;
82       default:
83          vpanic("ppHRegARM");
84    }
85 }
86 
hregARM_R0(void)87 HReg hregARM_R0  ( void ) { return mkHReg(0,  HRcInt32, False); }
hregARM_R1(void)88 HReg hregARM_R1  ( void ) { return mkHReg(1,  HRcInt32, False); }
hregARM_R2(void)89 HReg hregARM_R2  ( void ) { return mkHReg(2,  HRcInt32, False); }
hregARM_R3(void)90 HReg hregARM_R3  ( void ) { return mkHReg(3,  HRcInt32, False); }
hregARM_R4(void)91 HReg hregARM_R4  ( void ) { return mkHReg(4,  HRcInt32, False); }
hregARM_R5(void)92 HReg hregARM_R5  ( void ) { return mkHReg(5,  HRcInt32, False); }
hregARM_R6(void)93 HReg hregARM_R6  ( void ) { return mkHReg(6,  HRcInt32, False); }
hregARM_R7(void)94 HReg hregARM_R7  ( void ) { return mkHReg(7,  HRcInt32, False); }
hregARM_R8(void)95 HReg hregARM_R8  ( void ) { return mkHReg(8,  HRcInt32, False); }
hregARM_R9(void)96 HReg hregARM_R9  ( void ) { return mkHReg(9,  HRcInt32, False); }
hregARM_R10(void)97 HReg hregARM_R10 ( void ) { return mkHReg(10, HRcInt32, False); }
hregARM_R11(void)98 HReg hregARM_R11 ( void ) { return mkHReg(11, HRcInt32, False); }
hregARM_R12(void)99 HReg hregARM_R12 ( void ) { return mkHReg(12, HRcInt32, False); }
hregARM_R13(void)100 HReg hregARM_R13 ( void ) { return mkHReg(13, HRcInt32, False); }
hregARM_R14(void)101 HReg hregARM_R14 ( void ) { return mkHReg(14, HRcInt32, False); }
hregARM_R15(void)102 HReg hregARM_R15 ( void ) { return mkHReg(15, HRcInt32, False); }
hregARM_D8(void)103 HReg hregARM_D8  ( void ) { return mkHReg(8,  HRcFlt64, False); }
hregARM_D9(void)104 HReg hregARM_D9  ( void ) { return mkHReg(9,  HRcFlt64, False); }
hregARM_D10(void)105 HReg hregARM_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
hregARM_D11(void)106 HReg hregARM_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
hregARM_D12(void)107 HReg hregARM_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
hregARM_S26(void)108 HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
hregARM_S27(void)109 HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
hregARM_S28(void)110 HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
hregARM_S29(void)111 HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
hregARM_S30(void)112 HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
hregARM_Q8(void)113 HReg hregARM_Q8  ( void ) { return mkHReg(8,  HRcVec128, False); }
hregARM_Q9(void)114 HReg hregARM_Q9  ( void ) { return mkHReg(9,  HRcVec128, False); }
hregARM_Q10(void)115 HReg hregARM_Q10 ( void ) { return mkHReg(10, HRcVec128, False); }
hregARM_Q11(void)116 HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
hregARM_Q12(void)117 HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
hregARM_Q13(void)118 HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
hregARM_Q14(void)119 HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
hregARM_Q15(void)120 HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
121 
getAllocableRegs_ARM(Int * nregs,HReg ** arr)122 void getAllocableRegs_ARM ( Int* nregs, HReg** arr )
123 {
124    Int i = 0;
125    *nregs = 26;
126    *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
127    // callee saves ones are listed first, since we prefer them
128    // if they're available
129    (*arr)[i++] = hregARM_R4();
130    (*arr)[i++] = hregARM_R5();
131    (*arr)[i++] = hregARM_R6();
132    (*arr)[i++] = hregARM_R7();
133    (*arr)[i++] = hregARM_R10();
134    (*arr)[i++] = hregARM_R11();
135    // otherwise we'll have to slum it out with caller-saves ones
136    (*arr)[i++] = hregARM_R0();
137    (*arr)[i++] = hregARM_R1();
138    (*arr)[i++] = hregARM_R2();
139    (*arr)[i++] = hregARM_R3();
140    (*arr)[i++] = hregARM_R9();
141    // FP hreegisters.  Note: these are all callee-save.  Yay!
142    // Hence we don't need to mention them as trashed in
143    // getHRegUsage for ARMInstr_Call.
144    (*arr)[i++] = hregARM_D8();
145    (*arr)[i++] = hregARM_D9();
146    (*arr)[i++] = hregARM_D10();
147    (*arr)[i++] = hregARM_D11();
148    (*arr)[i++] = hregARM_D12();
149    (*arr)[i++] = hregARM_S26();
150    (*arr)[i++] = hregARM_S27();
151    (*arr)[i++] = hregARM_S28();
152    (*arr)[i++] = hregARM_S29();
153    (*arr)[i++] = hregARM_S30();
154 
155    (*arr)[i++] = hregARM_Q8();
156    (*arr)[i++] = hregARM_Q9();
157    (*arr)[i++] = hregARM_Q10();
158    (*arr)[i++] = hregARM_Q11();
159    (*arr)[i++] = hregARM_Q12();
160 
161    //(*arr)[i++] = hregARM_Q13();
162    //(*arr)[i++] = hregARM_Q14();
163    //(*arr)[i++] = hregARM_Q15();
164 
165    // unavail: r8 as GSP
166    // r12 is used as a spill/reload temporary
167    // r13 as SP
168    // r14 as LR
169    // r15 as PC
170    //
171    // All in all, we have 11 allocatable integer registers:
172    // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
173    // and r12 dedicated as a spill temporary.
174    // 13 14 and 15 are not under the allocator's control.
175    //
176    // Hence for the allocatable registers we have:
177    //
178    // callee-saved: 4 5 6 7 (8) 9 10 11
179    // caller-saved: 0 1 2 3
180    // Note 9 is ambiguous: the base EABI does not give an e/r-saved
181    // designation for it, but the Linux instantiation of the ABI
182    // specifies it as callee-saved.
183    //
184    // If the set of available registers changes or if the e/r status
185    // changes, be sure to re-check/sync the definition of
186    // getHRegUsage for ARMInstr_Call too.
187    vassert(i == *nregs);
188 }
189 
190 
191 
192 /* --------- Condition codes, ARM encoding. --------- */
193 
showARMCondCode(ARMCondCode cond)194 const HChar* showARMCondCode ( ARMCondCode cond ) {
195    switch (cond) {
196        case ARMcc_EQ:  return "eq";
197        case ARMcc_NE:  return "ne";
198        case ARMcc_HS:  return "hs";
199        case ARMcc_LO:  return "lo";
200        case ARMcc_MI:  return "mi";
201        case ARMcc_PL:  return "pl";
202        case ARMcc_VS:  return "vs";
203        case ARMcc_VC:  return "vc";
204        case ARMcc_HI:  return "hi";
205        case ARMcc_LS:  return "ls";
206        case ARMcc_GE:  return "ge";
207        case ARMcc_LT:  return "lt";
208        case ARMcc_GT:  return "gt";
209        case ARMcc_LE:  return "le";
210        case ARMcc_AL:  return "al"; // default
211        case ARMcc_NV:  return "nv";
212        default: vpanic("showARMCondCode");
213    }
214 }
215 
216 
217 /* --------- Mem AModes: Addressing Mode 1 --------- */
218 
ARMAMode1_RI(HReg reg,Int simm13)219 ARMAMode1* ARMAMode1_RI  ( HReg reg, Int simm13 ) {
220    ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
221    am->tag              = ARMam1_RI;
222    am->ARMam1.RI.reg    = reg;
223    am->ARMam1.RI.simm13 = simm13;
224    vassert(-4095 <= simm13 && simm13 <= 4095);
225    return am;
226 }
ARMAMode1_RRS(HReg base,HReg index,UInt shift)227 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
228    ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
229    am->tag              = ARMam1_RRS;
230    am->ARMam1.RRS.base  = base;
231    am->ARMam1.RRS.index = index;
232    am->ARMam1.RRS.shift = shift;
233    vassert(0 <= shift && shift <= 3);
234    return am;
235 }
236 
ppARMAMode1(ARMAMode1 * am)237 void ppARMAMode1 ( ARMAMode1* am ) {
238    switch (am->tag) {
239       case ARMam1_RI:
240          vex_printf("%d(", am->ARMam1.RI.simm13);
241          ppHRegARM(am->ARMam1.RI.reg);
242          vex_printf(")");
243          break;
244       case ARMam1_RRS:
245          vex_printf("(");
246          ppHRegARM(am->ARMam1.RRS.base);
247          vex_printf(",");
248          ppHRegARM(am->ARMam1.RRS.index);
249          vex_printf(",%u)", am->ARMam1.RRS.shift);
250          break;
251       default:
252          vassert(0);
253    }
254 }
255 
addRegUsage_ARMAMode1(HRegUsage * u,ARMAMode1 * am)256 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
257    switch (am->tag) {
258       case ARMam1_RI:
259          addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
260          return;
261       case ARMam1_RRS:
262          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
263          //    addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
264          //   return;
265       default:
266          vpanic("addRegUsage_ARMAmode1");
267    }
268 }
269 
mapRegs_ARMAMode1(HRegRemap * m,ARMAMode1 * am)270 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
271    switch (am->tag) {
272       case ARMam1_RI:
273          am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
274          return;
275       case ARMam1_RRS:
276          //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
277          //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
278          //return;
279       default:
280          vpanic("mapRegs_ARMAmode1");
281    }
282 }
283 
284 
285 /* --------- Mem AModes: Addressing Mode 2 --------- */
286 
ARMAMode2_RI(HReg reg,Int simm9)287 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
288    ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
289    am->tag             = ARMam2_RI;
290    am->ARMam2.RI.reg   = reg;
291    am->ARMam2.RI.simm9 = simm9;
292    vassert(-255 <= simm9 && simm9 <= 255);
293    return am;
294 }
ARMAMode2_RR(HReg base,HReg index)295 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
296    ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
297    am->tag             = ARMam2_RR;
298    am->ARMam2.RR.base  = base;
299    am->ARMam2.RR.index = index;
300    return am;
301 }
302 
ppARMAMode2(ARMAMode2 * am)303 void ppARMAMode2 ( ARMAMode2* am ) {
304    switch (am->tag) {
305       case ARMam2_RI:
306          vex_printf("%d(", am->ARMam2.RI.simm9);
307          ppHRegARM(am->ARMam2.RI.reg);
308          vex_printf(")");
309          break;
310       case ARMam2_RR:
311          vex_printf("(");
312          ppHRegARM(am->ARMam2.RR.base);
313          vex_printf(",");
314          ppHRegARM(am->ARMam2.RR.index);
315          vex_printf(")");
316          break;
317       default:
318          vassert(0);
319    }
320 }
321 
addRegUsage_ARMAMode2(HRegUsage * u,ARMAMode2 * am)322 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
323    switch (am->tag) {
324       case ARMam2_RI:
325          addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
326          return;
327       case ARMam2_RR:
328          //    addHRegUse(u, HRmRead, am->ARMam2.RR.base);
329          //    addHRegUse(u, HRmRead, am->ARMam2.RR.index);
330          //   return;
331       default:
332          vpanic("addRegUsage_ARMAmode2");
333    }
334 }
335 
mapRegs_ARMAMode2(HRegRemap * m,ARMAMode2 * am)336 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
337    switch (am->tag) {
338       case ARMam2_RI:
339          am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
340          return;
341       case ARMam2_RR:
342          //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
343          //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
344          //return;
345       default:
346          vpanic("mapRegs_ARMAmode2");
347    }
348 }
349 
350 
351 /* --------- Mem AModes: Addressing Mode VFP --------- */
352 
mkARMAModeV(HReg reg,Int simm11)353 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
354    ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
355    vassert(simm11 >= -1020 && simm11 <= 1020);
356    vassert(0 == (simm11 & 3));
357    am->reg    = reg;
358    am->simm11 = simm11;
359    return am;
360 }
361 
ppARMAModeV(ARMAModeV * am)362 void ppARMAModeV ( ARMAModeV* am ) {
363    vex_printf("%d(", am->simm11);
364    ppHRegARM(am->reg);
365    vex_printf(")");
366 }
367 
addRegUsage_ARMAModeV(HRegUsage * u,ARMAModeV * am)368 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
369    addHRegUse(u, HRmRead, am->reg);
370 }
371 
mapRegs_ARMAModeV(HRegRemap * m,ARMAModeV * am)372 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
373    am->reg = lookupHRegRemap(m, am->reg);
374 }
375 
376 
377 /* --------- Mem AModes: Addressing Mode Neon ------- */
378 
mkARMAModeN_RR(HReg rN,HReg rM)379 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
380    ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
381    am->tag = ARMamN_RR;
382    am->ARMamN.RR.rN = rN;
383    am->ARMamN.RR.rM = rM;
384    return am;
385 }
386 
mkARMAModeN_R(HReg rN)387 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
388    ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
389    am->tag = ARMamN_R;
390    am->ARMamN.R.rN = rN;
391    return am;
392 }
393 
addRegUsage_ARMAModeN(HRegUsage * u,ARMAModeN * am)394 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
395    if (am->tag == ARMamN_R) {
396       addHRegUse(u, HRmRead, am->ARMamN.R.rN);
397    } else {
398       addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
399       addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
400    }
401 }
402 
mapRegs_ARMAModeN(HRegRemap * m,ARMAModeN * am)403 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
404    if (am->tag == ARMamN_R) {
405       am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
406    } else {
407       am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
408       am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
409    }
410 }
411 
ppARMAModeN(ARMAModeN * am)412 void ppARMAModeN ( ARMAModeN* am ) {
413    vex_printf("[");
414    if (am->tag == ARMamN_R) {
415       ppHRegARM(am->ARMamN.R.rN);
416    } else {
417       ppHRegARM(am->ARMamN.RR.rN);
418    }
419    vex_printf("]");
420    if (am->tag == ARMamN_RR) {
421       vex_printf(", ");
422       ppHRegARM(am->ARMamN.RR.rM);
423    }
424 }
425 
426 
427 /* --------- Reg or imm-8x4 operands --------- */
428 
ROR32(UInt x,UInt sh)429 static UInt ROR32 ( UInt x, UInt sh ) {
430    vassert(sh >= 0 && sh < 32);
431    if (sh == 0)
432       return x;
433    else
434       return (x << (32-sh)) | (x >> sh);
435 }
436 
ARMRI84_I84(UShort imm8,UShort imm4)437 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
438    ARMRI84* ri84          = LibVEX_Alloc(sizeof(ARMRI84));
439    ri84->tag              = ARMri84_I84;
440    ri84->ARMri84.I84.imm8 = imm8;
441    ri84->ARMri84.I84.imm4 = imm4;
442    vassert(imm8 >= 0 && imm8 <= 255);
443    vassert(imm4 >= 0 && imm4 <= 15);
444    return ri84;
445 }
ARMRI84_R(HReg reg)446 ARMRI84* ARMRI84_R ( HReg reg ) {
447    ARMRI84* ri84       = LibVEX_Alloc(sizeof(ARMRI84));
448    ri84->tag           = ARMri84_R;
449    ri84->ARMri84.R.reg = reg;
450    return ri84;
451 }
452 
ppARMRI84(ARMRI84 * ri84)453 void ppARMRI84 ( ARMRI84* ri84 ) {
454    switch (ri84->tag) {
455       case ARMri84_I84:
456          vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
457                                   2 * ri84->ARMri84.I84.imm4));
458          break;
459       case ARMri84_R:
460          ppHRegARM(ri84->ARMri84.R.reg);
461          break;
462       default:
463          vassert(0);
464    }
465 }
466 
addRegUsage_ARMRI84(HRegUsage * u,ARMRI84 * ri84)467 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
468    switch (ri84->tag) {
469       case ARMri84_I84:
470          return;
471       case ARMri84_R:
472          addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
473          return;
474       default:
475          vpanic("addRegUsage_ARMRI84");
476    }
477 }
478 
mapRegs_ARMRI84(HRegRemap * m,ARMRI84 * ri84)479 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
480    switch (ri84->tag) {
481       case ARMri84_I84:
482          return;
483       case ARMri84_R:
484          ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
485          return;
486       default:
487          vpanic("mapRegs_ARMRI84");
488    }
489 }
490 
491 
492 /* --------- Reg or imm5 operands --------- */
493 
ARMRI5_I5(UInt imm5)494 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
495    ARMRI5* ri5         = LibVEX_Alloc(sizeof(ARMRI5));
496    ri5->tag            = ARMri5_I5;
497    ri5->ARMri5.I5.imm5 = imm5;
498    vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
499    return ri5;
500 }
ARMRI5_R(HReg reg)501 ARMRI5* ARMRI5_R ( HReg reg ) {
502    ARMRI5* ri5       = LibVEX_Alloc(sizeof(ARMRI5));
503    ri5->tag          = ARMri5_R;
504    ri5->ARMri5.R.reg = reg;
505    return ri5;
506 }
507 
ppARMRI5(ARMRI5 * ri5)508 void ppARMRI5 ( ARMRI5* ri5 ) {
509    switch (ri5->tag) {
510       case ARMri5_I5:
511          vex_printf("%u", ri5->ARMri5.I5.imm5);
512          break;
513       case ARMri5_R:
514          ppHRegARM(ri5->ARMri5.R.reg);
515          break;
516       default:
517          vassert(0);
518    }
519 }
520 
addRegUsage_ARMRI5(HRegUsage * u,ARMRI5 * ri5)521 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
522    switch (ri5->tag) {
523       case ARMri5_I5:
524          return;
525       case ARMri5_R:
526          addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
527          return;
528       default:
529          vpanic("addRegUsage_ARMRI5");
530    }
531 }
532 
mapRegs_ARMRI5(HRegRemap * m,ARMRI5 * ri5)533 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
534    switch (ri5->tag) {
535       case ARMri5_I5:
536          return;
537       case ARMri5_R:
538          ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
539          return;
540       default:
541          vpanic("mapRegs_ARMRI5");
542    }
543 }
544 
545 /* -------- Neon Immediate operatnd --------- */
546 
ARMNImm_TI(UInt type,UInt imm8)547 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
548    ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
549    i->type = type;
550    i->imm8 = imm8;
551    return i;
552 }
553 
ARMNImm_to_Imm64(ARMNImm * imm)554 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
555    int i, j;
556    ULong y, x = imm->imm8;
557    switch (imm->type) {
558       case 3:
559          x = x << 8; /* fallthrough */
560       case 2:
561          x = x << 8; /* fallthrough */
562       case 1:
563          x = x << 8; /* fallthrough */
564       case 0:
565          return (x << 32) | x;
566       case 5:
567       case 6:
568          if (imm->type == 5)
569             x = x << 8;
570          else
571             x = (x << 8) | x;
572          /* fallthrough */
573       case 4:
574          x = (x << 16) | x;
575          return (x << 32) | x;
576       case 8:
577          x = (x << 8) | 0xFF;
578          /* fallthrough */
579       case 7:
580          x = (x << 8) | 0xFF;
581          return (x << 32) | x;
582       case 9:
583          x = 0;
584          for (i = 7; i >= 0; i--) {
585             y = ((ULong)imm->imm8 >> i) & 1;
586             for (j = 0; j < 8; j++) {
587                x = (x << 1) | y;
588             }
589          }
590          return x;
591       case 10:
592          x |= (x & 0x80) << 5;
593          x |= (~x & 0x40) << 5;
594          x &= 0x187F; /* 0001 1000 0111 1111 */
595          x |= (x & 0x40) << 4;
596          x |= (x & 0x40) << 3;
597          x |= (x & 0x40) << 2;
598          x |= (x & 0x40) << 1;
599          x = x << 19;
600          x = (x << 32) | x;
601          return x;
602       default:
603          vpanic("ARMNImm_to_Imm64");
604    }
605 }
606 
Imm64_to_ARMNImm(ULong x)607 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
608    ARMNImm tmp;
609    if ((x & 0xFFFFFFFF) == (x >> 32)) {
610       if ((x & 0xFFFFFF00) == 0)
611          return ARMNImm_TI(0, x & 0xFF);
612       if ((x & 0xFFFF00FF) == 0)
613          return ARMNImm_TI(1, (x >> 8) & 0xFF);
614       if ((x & 0xFF00FFFF) == 0)
615          return ARMNImm_TI(2, (x >> 16) & 0xFF);
616       if ((x & 0x00FFFFFF) == 0)
617          return ARMNImm_TI(3, (x >> 24) & 0xFF);
618       if ((x & 0xFFFF00FF) == 0xFF)
619          return ARMNImm_TI(7, (x >> 8) & 0xFF);
620       if ((x & 0xFF00FFFF) == 0xFFFF)
621          return ARMNImm_TI(8, (x >> 16) & 0xFF);
622       if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
623          if ((x & 0xFF00) == 0)
624             return ARMNImm_TI(4, x & 0xFF);
625          if ((x & 0x00FF) == 0)
626             return ARMNImm_TI(5, (x >> 8) & 0xFF);
627          if ((x & 0xFF) == ((x >> 8) & 0xFF))
628             return ARMNImm_TI(6, x & 0xFF);
629       }
630       if ((x & 0x7FFFF) == 0) {
631          tmp.type = 10;
632          tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
633          if (ARMNImm_to_Imm64(&tmp) == x)
634             return ARMNImm_TI(tmp.type, tmp.imm8);
635       }
636    } else {
637       /* This can only be type 9. */
638       tmp.imm8 = (((x >> 56) & 1) << 7)
639                | (((x >> 48) & 1) << 6)
640                | (((x >> 40) & 1) << 5)
641                | (((x >> 32) & 1) << 4)
642                | (((x >> 24) & 1) << 3)
643                | (((x >> 16) & 1) << 2)
644                | (((x >>  8) & 1) << 1)
645                | (((x >>  0) & 1) << 0);
646       tmp.type = 9;
647       if (ARMNImm_to_Imm64 (&tmp) == x)
648          return ARMNImm_TI(tmp.type, tmp.imm8);
649    }
650    return NULL;
651 }
652 
ppARMNImm(ARMNImm * i)653 void ppARMNImm (ARMNImm* i) {
654    ULong x = ARMNImm_to_Imm64(i);
655    vex_printf("0x%llX%llX", x, x);
656 }
657 
658 /* -- Register or scalar operand --- */
659 
mkARMNRS(ARMNRS_tag tag,HReg reg,UInt index)660 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
661 {
662    ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
663    p->tag = tag;
664    p->reg = reg;
665    p->index = index;
666    return p;
667 }
668 
ppARMNRS(ARMNRS * p)669 void ppARMNRS(ARMNRS *p)
670 {
671    ppHRegARM(p->reg);
672    if (p->tag == ARMNRS_Scalar) {
673       vex_printf("[%d]", p->index);
674    }
675 }
676 
677 /* --------- Instructions. --------- */
678 
showARMAluOp(ARMAluOp op)679 const HChar* showARMAluOp ( ARMAluOp op ) {
680    switch (op) {
681       case ARMalu_ADD:  return "add";
682       case ARMalu_ADDS: return "adds";
683       case ARMalu_ADC:  return "adc";
684       case ARMalu_SUB:  return "sub";
685       case ARMalu_SUBS: return "subs";
686       case ARMalu_SBC:  return "sbc";
687       case ARMalu_AND:  return "and";
688       case ARMalu_BIC:  return "bic";
689       case ARMalu_OR:   return "orr";
690       case ARMalu_XOR:  return "xor";
691       default: vpanic("showARMAluOp");
692    }
693 }
694 
showARMShiftOp(ARMShiftOp op)695 const HChar* showARMShiftOp ( ARMShiftOp op ) {
696    switch (op) {
697       case ARMsh_SHL: return "shl";
698       case ARMsh_SHR: return "shr";
699       case ARMsh_SAR: return "sar";
700       default: vpanic("showARMShiftOp");
701    }
702 }
703 
showARMUnaryOp(ARMUnaryOp op)704 const HChar* showARMUnaryOp ( ARMUnaryOp op ) {
705    switch (op) {
706       case ARMun_NEG: return "neg";
707       case ARMun_NOT: return "not";
708       case ARMun_CLZ: return "clz";
709       default: vpanic("showARMUnaryOp");
710    }
711 }
712 
showARMMulOp(ARMMulOp op)713 const HChar* showARMMulOp ( ARMMulOp op ) {
714    switch (op) {
715       case ARMmul_PLAIN: return "mul";
716       case ARMmul_ZX:    return "umull";
717       case ARMmul_SX:    return "smull";
718       default: vpanic("showARMMulOp");
719    }
720 }
721 
showARMVfpOp(ARMVfpOp op)722 const HChar* showARMVfpOp ( ARMVfpOp op ) {
723    switch (op) {
724       case ARMvfp_ADD: return "add";
725       case ARMvfp_SUB: return "sub";
726       case ARMvfp_MUL: return "mul";
727       case ARMvfp_DIV: return "div";
728       default: vpanic("showARMVfpOp");
729    }
730 }
731 
showARMVfpUnaryOp(ARMVfpUnaryOp op)732 const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
733    switch (op) {
734       case ARMvfpu_COPY: return "cpy";
735       case ARMvfpu_NEG:  return "neg";
736       case ARMvfpu_ABS:  return "abs";
737       case ARMvfpu_SQRT: return "sqrt";
738       default: vpanic("showARMVfpUnaryOp");
739    }
740 }
741 
showARMNeonBinOp(ARMNeonBinOp op)742 const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
743    switch (op) {
744       case ARMneon_VAND: return "vand";
745       case ARMneon_VORR: return "vorr";
746       case ARMneon_VXOR: return "veor";
747       case ARMneon_VADD: return "vadd";
748       case ARMneon_VRHADDS: return "vrhadd";
749       case ARMneon_VRHADDU: return "vrhadd";
750       case ARMneon_VADDFP: return "vadd";
751       case ARMneon_VPADDFP: return "vpadd";
752       case ARMneon_VABDFP: return "vabd";
753       case ARMneon_VSUB: return "vsub";
754       case ARMneon_VSUBFP: return "vsub";
755       case ARMneon_VMINU: return "vmin";
756       case ARMneon_VMINS: return "vmin";
757       case ARMneon_VMINF: return "vmin";
758       case ARMneon_VMAXU: return "vmax";
759       case ARMneon_VMAXS: return "vmax";
760       case ARMneon_VMAXF: return "vmax";
761       case ARMneon_VQADDU: return "vqadd";
762       case ARMneon_VQADDS: return "vqadd";
763       case ARMneon_VQSUBU: return "vqsub";
764       case ARMneon_VQSUBS: return "vqsub";
765       case ARMneon_VCGTU:  return "vcgt";
766       case ARMneon_VCGTS:  return "vcgt";
767       case ARMneon_VCGTF:  return "vcgt";
768       case ARMneon_VCGEF:  return "vcgt";
769       case ARMneon_VCGEU:  return "vcge";
770       case ARMneon_VCGES:  return "vcge";
771       case ARMneon_VCEQ:  return "vceq";
772       case ARMneon_VCEQF:  return "vceq";
773       case ARMneon_VPADD:   return "vpadd";
774       case ARMneon_VPMINU:   return "vpmin";
775       case ARMneon_VPMINS:   return "vpmin";
776       case ARMneon_VPMINF:   return "vpmin";
777       case ARMneon_VPMAXU:   return "vpmax";
778       case ARMneon_VPMAXS:   return "vpmax";
779       case ARMneon_VPMAXF:   return "vpmax";
780       case ARMneon_VEXT:   return "vext";
781       case ARMneon_VMUL:   return "vmuli";
782       case ARMneon_VMULLU:   return "vmull";
783       case ARMneon_VMULLS:   return "vmull";
784       case ARMneon_VMULP:  return "vmul";
785       case ARMneon_VMULFP:  return "vmul";
786       case ARMneon_VMULLP:  return "vmul";
787       case ARMneon_VQDMULH: return "vqdmulh";
788       case ARMneon_VQRDMULH: return "vqrdmulh";
789       case ARMneon_VQDMULL: return "vqdmull";
790       case ARMneon_VTBL: return "vtbl";
791       case ARMneon_VRECPS: return "vrecps";
792       case ARMneon_VRSQRTS: return "vrecps";
793       case ARMneon_INVALID: return "??invalid??";
794       /* ... */
795       default: vpanic("showARMNeonBinOp");
796    }
797 }
798 
showARMNeonBinOpDataType(ARMNeonBinOp op)799 const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
800    switch (op) {
801       case ARMneon_VAND:
802       case ARMneon_VORR:
803       case ARMneon_VXOR:
804          return "";
805       case ARMneon_VADD:
806       case ARMneon_VSUB:
807       case ARMneon_VEXT:
808       case ARMneon_VMUL:
809       case ARMneon_VPADD:
810       case ARMneon_VTBL:
811       case ARMneon_VCEQ:
812          return ".i";
813       case ARMneon_VRHADDU:
814       case ARMneon_VMINU:
815       case ARMneon_VMAXU:
816       case ARMneon_VQADDU:
817       case ARMneon_VQSUBU:
818       case ARMneon_VCGTU:
819       case ARMneon_VCGEU:
820       case ARMneon_VMULLU:
821       case ARMneon_VPMINU:
822       case ARMneon_VPMAXU:
823          return ".u";
824       case ARMneon_VRHADDS:
825       case ARMneon_VMINS:
826       case ARMneon_VMAXS:
827       case ARMneon_VQADDS:
828       case ARMneon_VQSUBS:
829       case ARMneon_VCGTS:
830       case ARMneon_VCGES:
831       case ARMneon_VQDMULL:
832       case ARMneon_VMULLS:
833       case ARMneon_VPMINS:
834       case ARMneon_VPMAXS:
835       case ARMneon_VQDMULH:
836       case ARMneon_VQRDMULH:
837          return ".s";
838       case ARMneon_VMULP:
839       case ARMneon_VMULLP:
840          return ".p";
841       case ARMneon_VADDFP:
842       case ARMneon_VABDFP:
843       case ARMneon_VPADDFP:
844       case ARMneon_VSUBFP:
845       case ARMneon_VMULFP:
846       case ARMneon_VMINF:
847       case ARMneon_VMAXF:
848       case ARMneon_VPMINF:
849       case ARMneon_VPMAXF:
850       case ARMneon_VCGTF:
851       case ARMneon_VCGEF:
852       case ARMneon_VCEQF:
853       case ARMneon_VRECPS:
854       case ARMneon_VRSQRTS:
855          return ".f";
856       /* ... */
857       default: vpanic("showARMNeonBinOpDataType");
858    }
859 }
860 
showARMNeonUnOp(ARMNeonUnOp op)861 const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
862    switch (op) {
863       case ARMneon_COPY: return "vmov";
864       case ARMneon_COPYLS: return "vmov";
865       case ARMneon_COPYLU: return "vmov";
866       case ARMneon_COPYN: return "vmov";
867       case ARMneon_COPYQNSS: return "vqmovn";
868       case ARMneon_COPYQNUS: return "vqmovun";
869       case ARMneon_COPYQNUU: return "vqmovn";
870       case ARMneon_NOT: return "vmvn";
871       case ARMneon_EQZ: return "vceq";
872       case ARMneon_CNT: return "vcnt";
873       case ARMneon_CLS: return "vcls";
874       case ARMneon_CLZ: return "vclz";
875       case ARMneon_DUP: return "vdup";
876       case ARMneon_PADDLS: return "vpaddl";
877       case ARMneon_PADDLU: return "vpaddl";
878       case ARMneon_VQSHLNSS: return "vqshl";
879       case ARMneon_VQSHLNUU: return "vqshl";
880       case ARMneon_VQSHLNUS: return "vqshlu";
881       case ARMneon_REV16: return "vrev16";
882       case ARMneon_REV32: return "vrev32";
883       case ARMneon_REV64: return "vrev64";
884       case ARMneon_VCVTFtoU: return "vcvt";
885       case ARMneon_VCVTFtoS: return "vcvt";
886       case ARMneon_VCVTUtoF: return "vcvt";
887       case ARMneon_VCVTStoF: return "vcvt";
888       case ARMneon_VCVTFtoFixedU: return "vcvt";
889       case ARMneon_VCVTFtoFixedS: return "vcvt";
890       case ARMneon_VCVTFixedUtoF: return "vcvt";
891       case ARMneon_VCVTFixedStoF: return "vcvt";
892       case ARMneon_VCVTF32toF16: return "vcvt";
893       case ARMneon_VCVTF16toF32: return "vcvt";
894       case ARMneon_VRECIP: return "vrecip";
895       case ARMneon_VRECIPF: return "vrecipf";
896       case ARMneon_VNEGF: return "vneg";
897       case ARMneon_ABS: return "vabs";
898       case ARMneon_VABSFP: return "vabsfp";
899       case ARMneon_VRSQRTEFP: return "vrsqrtefp";
900       case ARMneon_VRSQRTE: return "vrsqrte";
901       /* ... */
902       default: vpanic("showARMNeonUnOp");
903    }
904 }
905 
showARMNeonUnOpDataType(ARMNeonUnOp op)906 const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
907    switch (op) {
908       case ARMneon_COPY:
909       case ARMneon_NOT:
910          return "";
911       case ARMneon_COPYN:
912       case ARMneon_EQZ:
913       case ARMneon_CNT:
914       case ARMneon_DUP:
915       case ARMneon_REV16:
916       case ARMneon_REV32:
917       case ARMneon_REV64:
918          return ".i";
919       case ARMneon_COPYLU:
920       case ARMneon_PADDLU:
921       case ARMneon_COPYQNUU:
922       case ARMneon_VQSHLNUU:
923       case ARMneon_VRECIP:
924       case ARMneon_VRSQRTE:
925          return ".u";
926       case ARMneon_CLS:
927       case ARMneon_CLZ:
928       case ARMneon_COPYLS:
929       case ARMneon_PADDLS:
930       case ARMneon_COPYQNSS:
931       case ARMneon_COPYQNUS:
932       case ARMneon_VQSHLNSS:
933       case ARMneon_VQSHLNUS:
934       case ARMneon_ABS:
935          return ".s";
936       case ARMneon_VRECIPF:
937       case ARMneon_VNEGF:
938       case ARMneon_VABSFP:
939       case ARMneon_VRSQRTEFP:
940          return ".f";
941       case ARMneon_VCVTFtoU: return ".u32.f32";
942       case ARMneon_VCVTFtoS: return ".s32.f32";
943       case ARMneon_VCVTUtoF: return ".f32.u32";
944       case ARMneon_VCVTStoF: return ".f32.s32";
945       case ARMneon_VCVTF16toF32: return ".f32.f16";
946       case ARMneon_VCVTF32toF16: return ".f16.f32";
947       case ARMneon_VCVTFtoFixedU: return ".u32.f32";
948       case ARMneon_VCVTFtoFixedS: return ".s32.f32";
949       case ARMneon_VCVTFixedUtoF: return ".f32.u32";
950       case ARMneon_VCVTFixedStoF: return ".f32.s32";
951       /* ... */
952       default: vpanic("showARMNeonUnOpDataType");
953    }
954 }
955 
showARMNeonUnOpS(ARMNeonUnOpS op)956 const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
957    switch (op) {
958       case ARMneon_SETELEM: return "vmov";
959       case ARMneon_GETELEMU: return "vmov";
960       case ARMneon_GETELEMS: return "vmov";
961       case ARMneon_VDUP: return "vdup";
962       /* ... */
963       default: vpanic("showARMNeonUnarySOp");
964    }
965 }
966 
showARMNeonUnOpSDataType(ARMNeonUnOpS op)967 const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
968    switch (op) {
969       case ARMneon_SETELEM:
970       case ARMneon_VDUP:
971          return ".i";
972       case ARMneon_GETELEMS:
973          return ".s";
974       case ARMneon_GETELEMU:
975          return ".u";
976       /* ... */
977       default: vpanic("showARMNeonUnarySOp");
978    }
979 }
980 
showARMNeonShiftOp(ARMNeonShiftOp op)981 const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
982    switch (op) {
983       case ARMneon_VSHL: return "vshl";
984       case ARMneon_VSAL: return "vshl";
985       case ARMneon_VQSHL: return "vqshl";
986       case ARMneon_VQSAL: return "vqshl";
987       /* ... */
988       default: vpanic("showARMNeonShiftOp");
989    }
990 }
991 
showARMNeonShiftOpDataType(ARMNeonShiftOp op)992 const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
993    switch (op) {
994       case ARMneon_VSHL:
995       case ARMneon_VQSHL:
996          return ".u";
997       case ARMneon_VSAL:
998       case ARMneon_VQSAL:
999          return ".s";
1000       /* ... */
1001       default: vpanic("showARMNeonShiftOpDataType");
1002    }
1003 }
1004 
showARMNeonDualOp(ARMNeonDualOp op)1005 const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
1006    switch (op) {
1007       case ARMneon_TRN: return "vtrn";
1008       case ARMneon_ZIP: return "vzip";
1009       case ARMneon_UZP: return "vuzp";
1010       /* ... */
1011       default: vpanic("showARMNeonDualOp");
1012    }
1013 }
1014 
showARMNeonDualOpDataType(ARMNeonDualOp op)1015 const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1016    switch (op) {
1017       case ARMneon_TRN:
1018       case ARMneon_ZIP:
1019       case ARMneon_UZP:
1020          return "i";
1021       /* ... */
1022       default: vpanic("showARMNeonDualOp");
1023    }
1024 }
1025 
showARMNeonDataSize_wrk(UInt size)1026 static const HChar* showARMNeonDataSize_wrk ( UInt size )
1027 {
1028    switch (size) {
1029       case 0: return "8";
1030       case 1: return "16";
1031       case 2: return "32";
1032       case 3: return "64";
1033       default: vpanic("showARMNeonDataSize");
1034    }
1035 }
1036 
showARMNeonDataSize(ARMInstr * i)1037 static const HChar* showARMNeonDataSize ( ARMInstr* i )
1038 {
1039    switch (i->tag) {
1040       case ARMin_NBinary:
1041          if (i->ARMin.NBinary.op == ARMneon_VEXT)
1042             return "8";
1043          if (i->ARMin.NBinary.op == ARMneon_VAND ||
1044              i->ARMin.NBinary.op == ARMneon_VORR ||
1045              i->ARMin.NBinary.op == ARMneon_VXOR)
1046             return "";
1047          return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1048       case ARMin_NUnary:
1049          if (i->ARMin.NUnary.op == ARMneon_COPY ||
1050              i->ARMin.NUnary.op == ARMneon_NOT ||
1051              i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1052              i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1053              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1054              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1055              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1056              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1057              i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1058              i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1059              i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1060              i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1061             return "";
1062          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1063              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1064              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1065             UInt size;
1066             size = i->ARMin.NUnary.size;
1067             if (size & 0x40)
1068                return "64";
1069             if (size & 0x20)
1070                return "32";
1071             if (size & 0x10)
1072                return "16";
1073             if (size & 0x08)
1074                return "8";
1075             vpanic("showARMNeonDataSize");
1076          }
1077          return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1078       case ARMin_NUnaryS:
1079          if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1080             int size;
1081             size = i->ARMin.NUnaryS.size;
1082             if ((size & 1) == 1)
1083                return "8";
1084             if ((size & 3) == 2)
1085                return "16";
1086             if ((size & 7) == 4)
1087                return "32";
1088             vpanic("showARMNeonDataSize");
1089          }
1090          return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1091       case ARMin_NShift:
1092          return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1093       case ARMin_NDual:
1094          return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1095       default:
1096          vpanic("showARMNeonDataSize");
1097    }
1098 }
1099 
ARMInstr_Alu(ARMAluOp op,HReg dst,HReg argL,ARMRI84 * argR)1100 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
1101                          HReg dst, HReg argL, ARMRI84* argR ) {
1102    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1103    i->tag            = ARMin_Alu;
1104    i->ARMin.Alu.op   = op;
1105    i->ARMin.Alu.dst  = dst;
1106    i->ARMin.Alu.argL = argL;
1107    i->ARMin.Alu.argR = argR;
1108    return i;
1109 }
ARMInstr_Shift(ARMShiftOp op,HReg dst,HReg argL,ARMRI5 * argR)1110 ARMInstr* ARMInstr_Shift  ( ARMShiftOp op,
1111                             HReg dst, HReg argL, ARMRI5* argR ) {
1112    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1113    i->tag              = ARMin_Shift;
1114    i->ARMin.Shift.op   = op;
1115    i->ARMin.Shift.dst  = dst;
1116    i->ARMin.Shift.argL = argL;
1117    i->ARMin.Shift.argR = argR;
1118    return i;
1119 }
ARMInstr_Unary(ARMUnaryOp op,HReg dst,HReg src)1120 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
1121    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1122    i->tag             = ARMin_Unary;
1123    i->ARMin.Unary.op  = op;
1124    i->ARMin.Unary.dst = dst;
1125    i->ARMin.Unary.src = src;
1126    return i;
1127 }
ARMInstr_CmpOrTst(Bool isCmp,HReg argL,ARMRI84 * argR)1128 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
1129    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1130    i->tag                  = ARMin_CmpOrTst;
1131    i->ARMin.CmpOrTst.isCmp = isCmp;
1132    i->ARMin.CmpOrTst.argL  = argL;
1133    i->ARMin.CmpOrTst.argR  = argR;
1134    return i;
1135 }
ARMInstr_Mov(HReg dst,ARMRI84 * src)1136 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
1137    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1138    i->tag           = ARMin_Mov;
1139    i->ARMin.Mov.dst = dst;
1140    i->ARMin.Mov.src = src;
1141    return i;
1142 }
ARMInstr_Imm32(HReg dst,UInt imm32)1143 ARMInstr* ARMInstr_Imm32  ( HReg dst, UInt imm32 ) {
1144    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1145    i->tag               = ARMin_Imm32;
1146    i->ARMin.Imm32.dst   = dst;
1147    i->ARMin.Imm32.imm32 = imm32;
1148    return i;
1149 }
ARMInstr_LdSt32(ARMCondCode cc,Bool isLoad,HReg rD,ARMAMode1 * amode)1150 ARMInstr* ARMInstr_LdSt32 ( ARMCondCode cc,
1151                             Bool isLoad, HReg rD, ARMAMode1* amode ) {
1152    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1153    i->tag                 = ARMin_LdSt32;
1154    i->ARMin.LdSt32.cc     = cc;
1155    i->ARMin.LdSt32.isLoad = isLoad;
1156    i->ARMin.LdSt32.rD     = rD;
1157    i->ARMin.LdSt32.amode  = amode;
1158    vassert(cc != ARMcc_NV);
1159    return i;
1160 }
ARMInstr_LdSt16(ARMCondCode cc,Bool isLoad,Bool signedLoad,HReg rD,ARMAMode2 * amode)1161 ARMInstr* ARMInstr_LdSt16 ( ARMCondCode cc,
1162                             Bool isLoad, Bool signedLoad,
1163                             HReg rD, ARMAMode2* amode ) {
1164    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1165    i->tag                     = ARMin_LdSt16;
1166    i->ARMin.LdSt16.cc         = cc;
1167    i->ARMin.LdSt16.isLoad     = isLoad;
1168    i->ARMin.LdSt16.signedLoad = signedLoad;
1169    i->ARMin.LdSt16.rD         = rD;
1170    i->ARMin.LdSt16.amode      = amode;
1171    vassert(cc != ARMcc_NV);
1172    return i;
1173 }
ARMInstr_LdSt8U(ARMCondCode cc,Bool isLoad,HReg rD,ARMAMode1 * amode)1174 ARMInstr* ARMInstr_LdSt8U ( ARMCondCode cc,
1175                             Bool isLoad, HReg rD, ARMAMode1* amode ) {
1176    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1177    i->tag                 = ARMin_LdSt8U;
1178    i->ARMin.LdSt8U.cc     = cc;
1179    i->ARMin.LdSt8U.isLoad = isLoad;
1180    i->ARMin.LdSt8U.rD     = rD;
1181    i->ARMin.LdSt8U.amode  = amode;
1182    vassert(cc != ARMcc_NV);
1183    return i;
1184 }
ARMInstr_Ld8S(ARMCondCode cc,HReg rD,ARMAMode2 * amode)1185 ARMInstr* ARMInstr_Ld8S ( ARMCondCode cc, HReg rD, ARMAMode2* amode ) {
1186    ARMInstr* i         = LibVEX_Alloc(sizeof(ARMInstr));
1187    i->tag              = ARMin_Ld8S;
1188    i->ARMin.Ld8S.cc    = cc;
1189    i->ARMin.Ld8S.rD    = rD;
1190    i->ARMin.Ld8S.amode = amode;
1191    vassert(cc != ARMcc_NV);
1192    return i;
1193 }
ARMInstr_XDirect(Addr32 dstGA,ARMAMode1 * amR15T,ARMCondCode cond,Bool toFastEP)1194 ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
1195                              ARMCondCode cond, Bool toFastEP ) {
1196    ARMInstr* i               = LibVEX_Alloc(sizeof(ARMInstr));
1197    i->tag                    = ARMin_XDirect;
1198    i->ARMin.XDirect.dstGA    = dstGA;
1199    i->ARMin.XDirect.amR15T   = amR15T;
1200    i->ARMin.XDirect.cond     = cond;
1201    i->ARMin.XDirect.toFastEP = toFastEP;
1202    return i;
1203 }
ARMInstr_XIndir(HReg dstGA,ARMAMode1 * amR15T,ARMCondCode cond)1204 ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
1205                             ARMCondCode cond ) {
1206    ARMInstr* i            = LibVEX_Alloc(sizeof(ARMInstr));
1207    i->tag                 = ARMin_XIndir;
1208    i->ARMin.XIndir.dstGA  = dstGA;
1209    i->ARMin.XIndir.amR15T = amR15T;
1210    i->ARMin.XIndir.cond   = cond;
1211    return i;
1212 }
ARMInstr_XAssisted(HReg dstGA,ARMAMode1 * amR15T,ARMCondCode cond,IRJumpKind jk)1213 ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
1214                                ARMCondCode cond, IRJumpKind jk ) {
1215    ARMInstr* i               = LibVEX_Alloc(sizeof(ARMInstr));
1216    i->tag                    = ARMin_XAssisted;
1217    i->ARMin.XAssisted.dstGA  = dstGA;
1218    i->ARMin.XAssisted.amR15T = amR15T;
1219    i->ARMin.XAssisted.cond   = cond;
1220    i->ARMin.XAssisted.jk     = jk;
1221    return i;
1222 }
ARMInstr_CMov(ARMCondCode cond,HReg dst,ARMRI84 * src)1223 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
1224    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1225    i->tag             = ARMin_CMov;
1226    i->ARMin.CMov.cond = cond;
1227    i->ARMin.CMov.dst  = dst;
1228    i->ARMin.CMov.src  = src;
1229    vassert(cond != ARMcc_AL);
1230    return i;
1231 }
ARMInstr_Call(ARMCondCode cond,HWord target,Int nArgRegs,RetLoc rloc)1232 ARMInstr* ARMInstr_Call ( ARMCondCode cond, HWord target, Int nArgRegs,
1233                           RetLoc rloc ) {
1234    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1235    i->tag                 = ARMin_Call;
1236    i->ARMin.Call.cond     = cond;
1237    i->ARMin.Call.target   = target;
1238    i->ARMin.Call.nArgRegs = nArgRegs;
1239    i->ARMin.Call.rloc     = rloc;
1240    vassert(is_sane_RetLoc(rloc));
1241    return i;
1242 }
ARMInstr_Mul(ARMMulOp op)1243 ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
1244    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1245    i->tag          = ARMin_Mul;
1246    i->ARMin.Mul.op = op;
1247    return i;
1248 }
ARMInstr_LdrEX(Int szB)1249 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
1250    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1251    i->tag             = ARMin_LdrEX;
1252    i->ARMin.LdrEX.szB = szB;
1253    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1254    return i;
1255 }
ARMInstr_StrEX(Int szB)1256 ARMInstr* ARMInstr_StrEX ( Int szB ) {
1257    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1258    i->tag             = ARMin_StrEX;
1259    i->ARMin.StrEX.szB = szB;
1260    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1261    return i;
1262 }
ARMInstr_VLdStD(Bool isLoad,HReg dD,ARMAModeV * am)1263 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
1264    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1265    i->tag                 = ARMin_VLdStD;
1266    i->ARMin.VLdStD.isLoad = isLoad;
1267    i->ARMin.VLdStD.dD     = dD;
1268    i->ARMin.VLdStD.amode  = am;
1269    return i;
1270 }
ARMInstr_VLdStS(Bool isLoad,HReg fD,ARMAModeV * am)1271 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
1272    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1273    i->tag                 = ARMin_VLdStS;
1274    i->ARMin.VLdStS.isLoad = isLoad;
1275    i->ARMin.VLdStS.fD     = fD;
1276    i->ARMin.VLdStS.amode  = am;
1277    return i;
1278 }
ARMInstr_VAluD(ARMVfpOp op,HReg dst,HReg argL,HReg argR)1279 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1280    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1281    i->tag              = ARMin_VAluD;
1282    i->ARMin.VAluD.op   = op;
1283    i->ARMin.VAluD.dst  = dst;
1284    i->ARMin.VAluD.argL = argL;
1285    i->ARMin.VAluD.argR = argR;
1286    return i;
1287 }
ARMInstr_VAluS(ARMVfpOp op,HReg dst,HReg argL,HReg argR)1288 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1289    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1290    i->tag              = ARMin_VAluS;
1291    i->ARMin.VAluS.op   = op;
1292    i->ARMin.VAluS.dst  = dst;
1293    i->ARMin.VAluS.argL = argL;
1294    i->ARMin.VAluS.argR = argR;
1295    return i;
1296 }
ARMInstr_VUnaryD(ARMVfpUnaryOp op,HReg dst,HReg src)1297 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1298    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1299    i->tag               = ARMin_VUnaryD;
1300    i->ARMin.VUnaryD.op  = op;
1301    i->ARMin.VUnaryD.dst = dst;
1302    i->ARMin.VUnaryD.src = src;
1303    return i;
1304 }
ARMInstr_VUnaryS(ARMVfpUnaryOp op,HReg dst,HReg src)1305 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1306    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1307    i->tag               = ARMin_VUnaryS;
1308    i->ARMin.VUnaryS.op  = op;
1309    i->ARMin.VUnaryS.dst = dst;
1310    i->ARMin.VUnaryS.src = src;
1311    return i;
1312 }
ARMInstr_VCmpD(HReg argL,HReg argR)1313 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
1314    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1315    i->tag              = ARMin_VCmpD;
1316    i->ARMin.VCmpD.argL = argL;
1317    i->ARMin.VCmpD.argR = argR;
1318    return i;
1319 }
ARMInstr_VCMovD(ARMCondCode cond,HReg dst,HReg src)1320 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1321    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1322    i->tag               = ARMin_VCMovD;
1323    i->ARMin.VCMovD.cond = cond;
1324    i->ARMin.VCMovD.dst  = dst;
1325    i->ARMin.VCMovD.src  = src;
1326    vassert(cond != ARMcc_AL);
1327    return i;
1328 }
ARMInstr_VCMovS(ARMCondCode cond,HReg dst,HReg src)1329 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1330    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1331    i->tag               = ARMin_VCMovS;
1332    i->ARMin.VCMovS.cond = cond;
1333    i->ARMin.VCMovS.dst  = dst;
1334    i->ARMin.VCMovS.src  = src;
1335    vassert(cond != ARMcc_AL);
1336    return i;
1337 }
ARMInstr_VCvtSD(Bool sToD,HReg dst,HReg src)1338 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1339    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1340    i->tag               = ARMin_VCvtSD;
1341    i->ARMin.VCvtSD.sToD = sToD;
1342    i->ARMin.VCvtSD.dst  = dst;
1343    i->ARMin.VCvtSD.src  = src;
1344    return i;
1345 }
ARMInstr_VXferD(Bool toD,HReg dD,HReg rHi,HReg rLo)1346 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1347    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1348    i->tag              = ARMin_VXferD;
1349    i->ARMin.VXferD.toD = toD;
1350    i->ARMin.VXferD.dD  = dD;
1351    i->ARMin.VXferD.rHi = rHi;
1352    i->ARMin.VXferD.rLo = rLo;
1353    return i;
1354 }
ARMInstr_VXferS(Bool toS,HReg fD,HReg rLo)1355 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1356    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1357    i->tag              = ARMin_VXferS;
1358    i->ARMin.VXferS.toS = toS;
1359    i->ARMin.VXferS.fD  = fD;
1360    i->ARMin.VXferS.rLo = rLo;
1361    return i;
1362 }
ARMInstr_VCvtID(Bool iToD,Bool syned,HReg dst,HReg src)1363 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1364                             HReg dst, HReg src ) {
1365    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1366    i->tag                = ARMin_VCvtID;
1367    i->ARMin.VCvtID.iToD  = iToD;
1368    i->ARMin.VCvtID.syned = syned;
1369    i->ARMin.VCvtID.dst   = dst;
1370    i->ARMin.VCvtID.src   = src;
1371    return i;
1372 }
ARMInstr_FPSCR(Bool toFPSCR,HReg iReg)1373 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
1374    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1375    i->tag                 = ARMin_FPSCR;
1376    i->ARMin.FPSCR.toFPSCR = toFPSCR;
1377    i->ARMin.FPSCR.iReg    = iReg;
1378    return i;
1379 }
ARMInstr_MFence(void)1380 ARMInstr* ARMInstr_MFence ( void ) {
1381    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1382    i->tag      = ARMin_MFence;
1383    return i;
1384 }
ARMInstr_CLREX(void)1385 ARMInstr* ARMInstr_CLREX( void ) {
1386    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1387    i->tag      = ARMin_CLREX;
1388    return i;
1389 }
1390 
ARMInstr_NLdStQ(Bool isLoad,HReg dQ,ARMAModeN * amode)1391 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
1392    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1393    i->tag                  = ARMin_NLdStQ;
1394    i->ARMin.NLdStQ.isLoad  = isLoad;
1395    i->ARMin.NLdStQ.dQ      = dQ;
1396    i->ARMin.NLdStQ.amode   = amode;
1397    return i;
1398 }
1399 
ARMInstr_NLdStD(Bool isLoad,HReg dD,ARMAModeN * amode)1400 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1401    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1402    i->tag                  = ARMin_NLdStD;
1403    i->ARMin.NLdStD.isLoad  = isLoad;
1404    i->ARMin.NLdStD.dD      = dD;
1405    i->ARMin.NLdStD.amode   = amode;
1406    return i;
1407 }
1408 
ARMInstr_NUnary(ARMNeonUnOp op,HReg dQ,HReg nQ,UInt size,Bool Q)1409 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1410                             UInt size, Bool Q ) {
1411    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1412    i->tag                = ARMin_NUnary;
1413    i->ARMin.NUnary.op   = op;
1414    i->ARMin.NUnary.src  = nQ;
1415    i->ARMin.NUnary.dst  = dQ;
1416    i->ARMin.NUnary.size = size;
1417    i->ARMin.NUnary.Q    = Q;
1418    return i;
1419 }
1420 
ARMInstr_NUnaryS(ARMNeonUnOpS op,ARMNRS * dst,ARMNRS * src,UInt size,Bool Q)1421 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
1422                              UInt size, Bool Q ) {
1423    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1424    i->tag                = ARMin_NUnaryS;
1425    i->ARMin.NUnaryS.op   = op;
1426    i->ARMin.NUnaryS.src  = src;
1427    i->ARMin.NUnaryS.dst  = dst;
1428    i->ARMin.NUnaryS.size = size;
1429    i->ARMin.NUnaryS.Q    = Q;
1430    return i;
1431 }
1432 
ARMInstr_NDual(ARMNeonDualOp op,HReg nQ,HReg mQ,UInt size,Bool Q)1433 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1434                            UInt size, Bool Q ) {
1435    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1436    i->tag                = ARMin_NDual;
1437    i->ARMin.NDual.op   = op;
1438    i->ARMin.NDual.arg1 = nQ;
1439    i->ARMin.NDual.arg2 = mQ;
1440    i->ARMin.NDual.size = size;
1441    i->ARMin.NDual.Q    = Q;
1442    return i;
1443 }
1444 
ARMInstr_NBinary(ARMNeonBinOp op,HReg dst,HReg argL,HReg argR,UInt size,Bool Q)1445 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1446                              HReg dst, HReg argL, HReg argR,
1447                              UInt size, Bool Q ) {
1448    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1449    i->tag                = ARMin_NBinary;
1450    i->ARMin.NBinary.op   = op;
1451    i->ARMin.NBinary.argL = argL;
1452    i->ARMin.NBinary.argR = argR;
1453    i->ARMin.NBinary.dst  = dst;
1454    i->ARMin.NBinary.size = size;
1455    i->ARMin.NBinary.Q    = Q;
1456    return i;
1457 }
1458 
ARMInstr_NeonImm(HReg dst,ARMNImm * imm)1459 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
1460    ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
1461    i->tag         = ARMin_NeonImm;
1462    i->ARMin.NeonImm.dst = dst;
1463    i->ARMin.NeonImm.imm = imm;
1464    return i;
1465 }
1466 
ARMInstr_NCMovQ(ARMCondCode cond,HReg dst,HReg src)1467 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1468    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1469    i->tag               = ARMin_NCMovQ;
1470    i->ARMin.NCMovQ.cond = cond;
1471    i->ARMin.NCMovQ.dst  = dst;
1472    i->ARMin.NCMovQ.src  = src;
1473    vassert(cond != ARMcc_AL);
1474    return i;
1475 }
1476 
ARMInstr_NShift(ARMNeonShiftOp op,HReg dst,HReg argL,HReg argR,UInt size,Bool Q)1477 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1478                             HReg dst, HReg argL, HReg argR,
1479                             UInt size, Bool Q ) {
1480    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1481    i->tag                = ARMin_NShift;
1482    i->ARMin.NShift.op   = op;
1483    i->ARMin.NShift.argL = argL;
1484    i->ARMin.NShift.argR = argR;
1485    i->ARMin.NShift.dst  = dst;
1486    i->ARMin.NShift.size = size;
1487    i->ARMin.NShift.Q    = Q;
1488    return i;
1489 }
1490 
ARMInstr_NShl64(HReg dst,HReg src,UInt amt)1491 ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
1492 {
1493    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1494    i->tag              = ARMin_NShl64;
1495    i->ARMin.NShl64.dst = dst;
1496    i->ARMin.NShl64.src = src;
1497    i->ARMin.NShl64.amt = amt;
1498    vassert(amt >= 1 && amt <= 63);
1499    return i;
1500 }
1501 
1502 /* Helper copy-pasted from isel.c */
fitsIn8x4(UInt * u8,UInt * u4,UInt u)1503 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1504 {
1505    UInt i;
1506    for (i = 0; i < 16; i++) {
1507       if (0 == (u & 0xFFFFFF00)) {
1508          *u8 = u;
1509          *u4 = i;
1510          return True;
1511       }
1512       u = ROR32(u, 30);
1513    }
1514    vassert(i == 16);
1515    return False;
1516 }
1517 
ARMInstr_Add32(HReg rD,HReg rN,UInt imm32)1518 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1519    UInt u8, u4;
1520    ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
1521    /* Try to generate single ADD if possible */
1522    if (fitsIn8x4(&u8, &u4, imm32)) {
1523       i->tag            = ARMin_Alu;
1524       i->ARMin.Alu.op   = ARMalu_ADD;
1525       i->ARMin.Alu.dst  = rD;
1526       i->ARMin.Alu.argL = rN;
1527       i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1528    } else {
1529       i->tag               = ARMin_Add32;
1530       i->ARMin.Add32.rD    = rD;
1531       i->ARMin.Add32.rN    = rN;
1532       i->ARMin.Add32.imm32 = imm32;
1533    }
1534    return i;
1535 }
1536 
ARMInstr_EvCheck(ARMAMode1 * amCounter,ARMAMode1 * amFailAddr)1537 ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
1538                              ARMAMode1* amFailAddr ) {
1539    ARMInstr* i                 = LibVEX_Alloc(sizeof(ARMInstr));
1540    i->tag                      = ARMin_EvCheck;
1541    i->ARMin.EvCheck.amCounter  = amCounter;
1542    i->ARMin.EvCheck.amFailAddr = amFailAddr;
1543    return i;
1544 }
1545 
ARMInstr_ProfInc(void)1546 ARMInstr* ARMInstr_ProfInc ( void ) {
1547    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1548    i->tag      = ARMin_ProfInc;
1549    return i;
1550 }
1551 
1552 /* ... */
1553 
ppARMInstr(ARMInstr * i)1554 void ppARMInstr ( ARMInstr* i ) {
1555    switch (i->tag) {
1556       case ARMin_Alu:
1557          vex_printf("%-4s  ", showARMAluOp(i->ARMin.Alu.op));
1558          ppHRegARM(i->ARMin.Alu.dst);
1559          vex_printf(", ");
1560          ppHRegARM(i->ARMin.Alu.argL);
1561          vex_printf(", ");
1562          ppARMRI84(i->ARMin.Alu.argR);
1563          return;
1564       case ARMin_Shift:
1565          vex_printf("%s   ", showARMShiftOp(i->ARMin.Shift.op));
1566          ppHRegARM(i->ARMin.Shift.dst);
1567          vex_printf(", ");
1568          ppHRegARM(i->ARMin.Shift.argL);
1569          vex_printf(", ");
1570          ppARMRI5(i->ARMin.Shift.argR);
1571          return;
1572       case ARMin_Unary:
1573          vex_printf("%s   ", showARMUnaryOp(i->ARMin.Unary.op));
1574          ppHRegARM(i->ARMin.Unary.dst);
1575          vex_printf(", ");
1576          ppHRegARM(i->ARMin.Unary.src);
1577          return;
1578       case ARMin_CmpOrTst:
1579          vex_printf("%s   ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
1580          ppHRegARM(i->ARMin.CmpOrTst.argL);
1581          vex_printf(", ");
1582          ppARMRI84(i->ARMin.CmpOrTst.argR);
1583          return;
1584       case ARMin_Mov:
1585          vex_printf("mov   ");
1586          ppHRegARM(i->ARMin.Mov.dst);
1587          vex_printf(", ");
1588          ppARMRI84(i->ARMin.Mov.src);
1589          return;
1590       case ARMin_Imm32:
1591          vex_printf("imm   ");
1592          ppHRegARM(i->ARMin.Imm32.dst);
1593          vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
1594          return;
1595       case ARMin_LdSt32:
1596          if (i->ARMin.LdSt32.isLoad) {
1597             vex_printf("ldr%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? "  "
1598                                     : showARMCondCode(i->ARMin.LdSt32.cc));
1599             ppHRegARM(i->ARMin.LdSt32.rD);
1600             vex_printf(", ");
1601             ppARMAMode1(i->ARMin.LdSt32.amode);
1602          } else {
1603             vex_printf("str%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? "  "
1604                                     : showARMCondCode(i->ARMin.LdSt32.cc));
1605             ppARMAMode1(i->ARMin.LdSt32.amode);
1606             vex_printf(", ");
1607             ppHRegARM(i->ARMin.LdSt32.rD);
1608          }
1609          return;
1610       case ARMin_LdSt16:
1611          if (i->ARMin.LdSt16.isLoad) {
1612             vex_printf("%s%s%s",
1613                        i->ARMin.LdSt16.signedLoad ? "ldrsh" : "ldrh",
1614                        i->ARMin.LdSt16.cc == ARMcc_AL ? "  "
1615                           : showARMCondCode(i->ARMin.LdSt16.cc),
1616                        i->ARMin.LdSt16.signedLoad ? " " : "  ");
1617             ppHRegARM(i->ARMin.LdSt16.rD);
1618             vex_printf(", ");
1619             ppARMAMode2(i->ARMin.LdSt16.amode);
1620          } else {
1621             vex_printf("strh%s  ",
1622                        i->ARMin.LdSt16.cc == ARMcc_AL ? "  "
1623                           : showARMCondCode(i->ARMin.LdSt16.cc));
1624             ppARMAMode2(i->ARMin.LdSt16.amode);
1625             vex_printf(", ");
1626             ppHRegARM(i->ARMin.LdSt16.rD);
1627          }
1628          return;
1629       case ARMin_LdSt8U:
1630          if (i->ARMin.LdSt8U.isLoad) {
1631             vex_printf("ldrb%s  ", i->ARMin.LdSt8U.cc == ARMcc_AL ? "  "
1632                                       : showARMCondCode(i->ARMin.LdSt8U.cc));
1633             ppHRegARM(i->ARMin.LdSt8U.rD);
1634             vex_printf(", ");
1635             ppARMAMode1(i->ARMin.LdSt8U.amode);
1636          } else {
1637             vex_printf("strb%s  ", i->ARMin.LdSt8U.cc == ARMcc_AL ? "  "
1638                                       : showARMCondCode(i->ARMin.LdSt8U.cc));
1639             ppARMAMode1(i->ARMin.LdSt8U.amode);
1640             vex_printf(", ");
1641             ppHRegARM(i->ARMin.LdSt8U.rD);
1642          }
1643          return;
1644       case ARMin_Ld8S:
1645          vex_printf("ldrsb%s ", i->ARMin.Ld8S.cc == ARMcc_AL ? "  "
1646                                    : showARMCondCode(i->ARMin.Ld8S.cc));
1647          ppARMAMode2(i->ARMin.Ld8S.amode);
1648          vex_printf(", ");
1649          ppHRegARM(i->ARMin.Ld8S.rD);
1650          return;
1651       case ARMin_XDirect:
1652          vex_printf("(xDirect) ");
1653          vex_printf("if (%%cpsr.%s) { ",
1654                     showARMCondCode(i->ARMin.XDirect.cond));
1655          vex_printf("movw r12,0x%x; ",
1656                     (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
1657          vex_printf("movt r12,0x%x; ",
1658                     (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
1659          vex_printf("str r12,");
1660          ppARMAMode1(i->ARMin.XDirect.amR15T);
1661          vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
1662                     i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1663          vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
1664                     i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1665          vex_printf("blx r12 }");
1666          return;
1667       case ARMin_XIndir:
1668          vex_printf("(xIndir) ");
1669          vex_printf("if (%%cpsr.%s) { ",
1670                     showARMCondCode(i->ARMin.XIndir.cond));
1671          vex_printf("str ");
1672          ppHRegARM(i->ARMin.XIndir.dstGA);
1673          vex_printf(",");
1674          ppARMAMode1(i->ARMin.XIndir.amR15T);
1675          vex_printf("; movw r12,LO16($disp_cp_xindir); ");
1676          vex_printf("movt r12,HI16($disp_cp_xindir); ");
1677          vex_printf("blx r12 }");
1678          return;
1679       case ARMin_XAssisted:
1680          vex_printf("(xAssisted) ");
1681          vex_printf("if (%%cpsr.%s) { ",
1682                     showARMCondCode(i->ARMin.XAssisted.cond));
1683          vex_printf("str ");
1684          ppHRegARM(i->ARMin.XAssisted.dstGA);
1685          vex_printf(",");
1686          ppARMAMode1(i->ARMin.XAssisted.amR15T);
1687          vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
1688                     (Int)i->ARMin.XAssisted.jk);
1689          vex_printf("movw r12,LO16($disp_cp_xassisted); ");
1690          vex_printf("movt r12,HI16($disp_cp_xassisted); ");
1691          vex_printf("blx r12 }");
1692          return;
1693       case ARMin_CMov:
1694          vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
1695          ppHRegARM(i->ARMin.CMov.dst);
1696          vex_printf(", ");
1697          ppARMRI84(i->ARMin.CMov.src);
1698          return;
1699       case ARMin_Call:
1700          vex_printf("call%s  ",
1701                     i->ARMin.Call.cond==ARMcc_AL
1702                        ? "" : showARMCondCode(i->ARMin.Call.cond));
1703          vex_printf("0x%lx [nArgRegs=%d, ",
1704                     i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
1705          ppRetLoc(i->ARMin.Call.rloc);
1706          vex_printf("]");
1707          return;
1708       case ARMin_Mul:
1709          vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
1710          if (i->ARMin.Mul.op == ARMmul_PLAIN) {
1711             vex_printf("r0, r2, r3");
1712          } else {
1713             vex_printf("r1:r0, r2, r3");
1714          }
1715          return;
1716       case ARMin_LdrEX: {
1717          const HChar* sz = "";
1718          switch (i->ARMin.LdrEX.szB) {
1719             case 1: sz = "b"; break; case 2: sz = "h"; break;
1720             case 8: sz = "d"; break; case 4: break;
1721             default: vassert(0);
1722          }
1723          vex_printf("ldrex%s %sr2, [r4]",
1724                     sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
1725          return;
1726       }
1727       case ARMin_StrEX: {
1728          const HChar* sz = "";
1729          switch (i->ARMin.StrEX.szB) {
1730             case 1: sz = "b"; break; case 2: sz = "h"; break;
1731             case 8: sz = "d"; break; case 4: break;
1732             default: vassert(0);
1733          }
1734          vex_printf("strex%s r0, %sr2, [r4]",
1735                     sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
1736          return;
1737       }
1738       case ARMin_VLdStD:
1739          if (i->ARMin.VLdStD.isLoad) {
1740             vex_printf("fldd  ");
1741             ppHRegARM(i->ARMin.VLdStD.dD);
1742             vex_printf(", ");
1743             ppARMAModeV(i->ARMin.VLdStD.amode);
1744          } else {
1745             vex_printf("fstd  ");
1746             ppARMAModeV(i->ARMin.VLdStD.amode);
1747             vex_printf(", ");
1748             ppHRegARM(i->ARMin.VLdStD.dD);
1749          }
1750          return;
1751       case ARMin_VLdStS:
1752          if (i->ARMin.VLdStS.isLoad) {
1753             vex_printf("flds  ");
1754             ppHRegARM(i->ARMin.VLdStS.fD);
1755             vex_printf(", ");
1756             ppARMAModeV(i->ARMin.VLdStS.amode);
1757          } else {
1758             vex_printf("fsts  ");
1759             ppARMAModeV(i->ARMin.VLdStS.amode);
1760             vex_printf(", ");
1761             ppHRegARM(i->ARMin.VLdStS.fD);
1762          }
1763          return;
1764       case ARMin_VAluD:
1765          vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
1766          ppHRegARM(i->ARMin.VAluD.dst);
1767          vex_printf(", ");
1768          ppHRegARM(i->ARMin.VAluD.argL);
1769          vex_printf(", ");
1770          ppHRegARM(i->ARMin.VAluD.argR);
1771          return;
1772       case ARMin_VAluS:
1773          vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
1774          ppHRegARM(i->ARMin.VAluS.dst);
1775          vex_printf(", ");
1776          ppHRegARM(i->ARMin.VAluS.argL);
1777          vex_printf(", ");
1778          ppHRegARM(i->ARMin.VAluS.argR);
1779          return;
1780       case ARMin_VUnaryD:
1781          vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
1782          ppHRegARM(i->ARMin.VUnaryD.dst);
1783          vex_printf(", ");
1784          ppHRegARM(i->ARMin.VUnaryD.src);
1785          return;
1786       case ARMin_VUnaryS:
1787          vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
1788          ppHRegARM(i->ARMin.VUnaryS.dst);
1789          vex_printf(", ");
1790          ppHRegARM(i->ARMin.VUnaryS.src);
1791          return;
1792       case ARMin_VCmpD:
1793          vex_printf("fcmpd ");
1794          ppHRegARM(i->ARMin.VCmpD.argL);
1795          vex_printf(", ");
1796          ppHRegARM(i->ARMin.VCmpD.argR);
1797          vex_printf(" ; fmstat");
1798          return;
1799       case ARMin_VCMovD:
1800          vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
1801          ppHRegARM(i->ARMin.VCMovD.dst);
1802          vex_printf(", ");
1803          ppHRegARM(i->ARMin.VCMovD.src);
1804          return;
1805       case ARMin_VCMovS:
1806          vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
1807          ppHRegARM(i->ARMin.VCMovS.dst);
1808          vex_printf(", ");
1809          ppHRegARM(i->ARMin.VCMovS.src);
1810          return;
1811       case ARMin_VCvtSD:
1812          vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
1813          ppHRegARM(i->ARMin.VCvtSD.dst);
1814          vex_printf(", ");
1815          ppHRegARM(i->ARMin.VCvtSD.src);
1816          return;
1817       case ARMin_VXferD:
1818          vex_printf("vmov  ");
1819          if (i->ARMin.VXferD.toD) {
1820             ppHRegARM(i->ARMin.VXferD.dD);
1821             vex_printf(", ");
1822             ppHRegARM(i->ARMin.VXferD.rLo);
1823             vex_printf(", ");
1824             ppHRegARM(i->ARMin.VXferD.rHi);
1825          } else {
1826             ppHRegARM(i->ARMin.VXferD.rLo);
1827             vex_printf(", ");
1828             ppHRegARM(i->ARMin.VXferD.rHi);
1829             vex_printf(", ");
1830             ppHRegARM(i->ARMin.VXferD.dD);
1831          }
1832          return;
1833       case ARMin_VXferS:
1834          vex_printf("vmov  ");
1835          if (i->ARMin.VXferS.toS) {
1836             ppHRegARM(i->ARMin.VXferS.fD);
1837             vex_printf(", ");
1838             ppHRegARM(i->ARMin.VXferS.rLo);
1839          } else {
1840             ppHRegARM(i->ARMin.VXferS.rLo);
1841             vex_printf(", ");
1842             ppHRegARM(i->ARMin.VXferS.fD);
1843          }
1844          return;
1845       case ARMin_VCvtID: {
1846          const HChar* nm = "?";
1847          if (i->ARMin.VCvtID.iToD) {
1848             nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
1849          } else {
1850             nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
1851          }
1852          vex_printf("%s ", nm);
1853          ppHRegARM(i->ARMin.VCvtID.dst);
1854          vex_printf(", ");
1855          ppHRegARM(i->ARMin.VCvtID.src);
1856          return;
1857       }
1858       case ARMin_FPSCR:
1859          if (i->ARMin.FPSCR.toFPSCR) {
1860             vex_printf("fmxr  fpscr, ");
1861             ppHRegARM(i->ARMin.FPSCR.iReg);
1862          } else {
1863             vex_printf("fmrx  ");
1864             ppHRegARM(i->ARMin.FPSCR.iReg);
1865             vex_printf(", fpscr");
1866          }
1867          return;
1868       case ARMin_MFence:
1869          vex_printf("(mfence) dsb sy; dmb sy; isb");
1870          return;
1871       case ARMin_CLREX:
1872          vex_printf("clrex");
1873          return;
1874       case ARMin_NLdStQ:
1875          if (i->ARMin.NLdStQ.isLoad)
1876             vex_printf("vld1.32 {");
1877          else
1878             vex_printf("vst1.32 {");
1879          ppHRegARM(i->ARMin.NLdStQ.dQ);
1880          vex_printf("} ");
1881          ppARMAModeN(i->ARMin.NLdStQ.amode);
1882          return;
1883       case ARMin_NLdStD:
1884          if (i->ARMin.NLdStD.isLoad)
1885             vex_printf("vld1.32 {");
1886          else
1887             vex_printf("vst1.32 {");
1888          ppHRegARM(i->ARMin.NLdStD.dD);
1889          vex_printf("} ");
1890          ppARMAModeN(i->ARMin.NLdStD.amode);
1891          return;
1892       case ARMin_NUnary:
1893          vex_printf("%s%s%s  ",
1894                     showARMNeonUnOp(i->ARMin.NUnary.op),
1895                     showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1896                     showARMNeonDataSize(i));
1897          ppHRegARM(i->ARMin.NUnary.dst);
1898          vex_printf(", ");
1899          ppHRegARM(i->ARMin.NUnary.src);
1900          if (i->ARMin.NUnary.op == ARMneon_EQZ)
1901             vex_printf(", #0");
1902          if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1903              i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1904              i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1905              i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
1906             vex_printf(", #%d", i->ARMin.NUnary.size);
1907          }
1908          if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1909              i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1910              i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1911             UInt size;
1912             size = i->ARMin.NUnary.size;
1913             if (size & 0x40) {
1914                vex_printf(", #%d", size - 64);
1915             } else if (size & 0x20) {
1916                vex_printf(", #%d", size - 32);
1917             } else if (size & 0x10) {
1918                vex_printf(", #%d", size - 16);
1919             } else if (size & 0x08) {
1920                vex_printf(", #%d", size - 8);
1921             }
1922          }
1923          return;
1924       case ARMin_NUnaryS:
1925          vex_printf("%s%s%s  ",
1926                     showARMNeonUnOpS(i->ARMin.NUnaryS.op),
1927                     showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
1928                     showARMNeonDataSize(i));
1929          ppARMNRS(i->ARMin.NUnaryS.dst);
1930          vex_printf(", ");
1931          ppARMNRS(i->ARMin.NUnaryS.src);
1932          return;
1933       case ARMin_NShift:
1934          vex_printf("%s%s%s  ",
1935                     showARMNeonShiftOp(i->ARMin.NShift.op),
1936                     showARMNeonShiftOpDataType(i->ARMin.NShift.op),
1937                     showARMNeonDataSize(i));
1938          ppHRegARM(i->ARMin.NShift.dst);
1939          vex_printf(", ");
1940          ppHRegARM(i->ARMin.NShift.argL);
1941          vex_printf(", ");
1942          ppHRegARM(i->ARMin.NShift.argR);
1943          return;
1944       case ARMin_NShl64:
1945          vex_printf("vshl.i64 ");
1946          ppHRegARM(i->ARMin.NShl64.dst);
1947          vex_printf(", ");
1948          ppHRegARM(i->ARMin.NShl64.src);
1949          vex_printf(", #%u", i->ARMin.NShl64.amt);
1950          return;
1951       case ARMin_NDual:
1952          vex_printf("%s%s%s  ",
1953                     showARMNeonDualOp(i->ARMin.NDual.op),
1954                     showARMNeonDualOpDataType(i->ARMin.NDual.op),
1955                     showARMNeonDataSize(i));
1956          ppHRegARM(i->ARMin.NDual.arg1);
1957          vex_printf(", ");
1958          ppHRegARM(i->ARMin.NDual.arg2);
1959          return;
1960       case ARMin_NBinary:
1961          vex_printf("%s%s%s",
1962                     showARMNeonBinOp(i->ARMin.NBinary.op),
1963                     showARMNeonBinOpDataType(i->ARMin.NBinary.op),
1964                     showARMNeonDataSize(i));
1965          vex_printf("  ");
1966          ppHRegARM(i->ARMin.NBinary.dst);
1967          vex_printf(", ");
1968          ppHRegARM(i->ARMin.NBinary.argL);
1969          vex_printf(", ");
1970          ppHRegARM(i->ARMin.NBinary.argR);
1971          return;
1972       case ARMin_NeonImm:
1973          vex_printf("vmov  ");
1974          ppHRegARM(i->ARMin.NeonImm.dst);
1975          vex_printf(", ");
1976          ppARMNImm(i->ARMin.NeonImm.imm);
1977          return;
1978       case ARMin_NCMovQ:
1979          vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
1980          ppHRegARM(i->ARMin.NCMovQ.dst);
1981          vex_printf(", ");
1982          ppHRegARM(i->ARMin.NCMovQ.src);
1983          return;
1984       case ARMin_Add32:
1985          vex_printf("add32 ");
1986          ppHRegARM(i->ARMin.Add32.rD);
1987          vex_printf(", ");
1988          ppHRegARM(i->ARMin.Add32.rN);
1989          vex_printf(", ");
1990          vex_printf("%d", i->ARMin.Add32.imm32);
1991          return;
1992       case ARMin_EvCheck:
1993          vex_printf("(evCheck) ldr r12,");
1994          ppARMAMode1(i->ARMin.EvCheck.amCounter);
1995          vex_printf("; subs r12,r12,$1; str r12,");
1996          ppARMAMode1(i->ARMin.EvCheck.amCounter);
1997          vex_printf("; bpl nofail; ldr r12,");
1998          ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
1999          vex_printf("; bx r12; nofail:");
2000          return;
2001       case ARMin_ProfInc:
2002          vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
2003                     "movw r12,HI16($NotKnownYet); "
2004                     "ldr r11,[r12]; "
2005                     "adds r11,r11,$1; "
2006                     "str r11,[r12]; "
2007                     "ldr r11,[r12+4]; "
2008                     "adc r11,r11,$0; "
2009                     "str r11,[r12+4]");
2010          return;
2011       default:
2012          vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
2013          vpanic("ppARMInstr(1)");
2014          return;
2015    }
2016 }
2017 
2018 
2019 /* --------- Helpers for register allocation. --------- */
2020 
getRegUsage_ARMInstr(HRegUsage * u,ARMInstr * i,Bool mode64)2021 void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 )
2022 {
2023    vassert(mode64 == False);
2024    initHRegUsage(u);
2025    switch (i->tag) {
2026       case ARMin_Alu:
2027          addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
2028          addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
2029          addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
2030          return;
2031       case ARMin_Shift:
2032          addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
2033          addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
2034          addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
2035          return;
2036       case ARMin_Unary:
2037          addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
2038          addHRegUse(u, HRmRead, i->ARMin.Unary.src);
2039          return;
2040       case ARMin_CmpOrTst:
2041          addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
2042          addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
2043          return;
2044       case ARMin_Mov:
2045          addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
2046          addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
2047          return;
2048       case ARMin_Imm32:
2049          addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
2050          return;
2051       case ARMin_LdSt32:
2052          addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
2053          if (i->ARMin.LdSt32.isLoad) {
2054             addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
2055             if (i->ARMin.LdSt32.cc != ARMcc_AL)
2056                addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2057          } else {
2058             addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2059          }
2060          return;
2061       case ARMin_LdSt16:
2062          addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
2063          if (i->ARMin.LdSt16.isLoad) {
2064             addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
2065             if (i->ARMin.LdSt16.cc != ARMcc_AL)
2066                addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2067          } else {
2068             addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2069          }
2070          return;
2071       case ARMin_LdSt8U:
2072          addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
2073          if (i->ARMin.LdSt8U.isLoad) {
2074             addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
2075             if (i->ARMin.LdSt8U.cc != ARMcc_AL)
2076                addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2077          } else {
2078             addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2079          }
2080          return;
2081       case ARMin_Ld8S:
2082          addRegUsage_ARMAMode2(u, i->ARMin.Ld8S.amode);
2083          addHRegUse(u, HRmWrite, i->ARMin.Ld8S.rD);
2084          if (i->ARMin.Ld8S.cc != ARMcc_AL)
2085             addHRegUse(u, HRmRead, i->ARMin.Ld8S.rD);
2086          return;
2087       /* XDirect/XIndir/XAssisted are also a bit subtle.  They
2088          conditionally exit the block.  Hence we only need to list (1)
2089          the registers that they read, and (2) the registers that they
2090          write in the case where the block is not exited.  (2) is
2091          empty, hence only (1) is relevant here. */
2092       case ARMin_XDirect:
2093          addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
2094          return;
2095       case ARMin_XIndir:
2096          addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
2097          addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
2098          return;
2099       case ARMin_XAssisted:
2100          addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
2101          addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
2102          return;
2103       case ARMin_CMov:
2104          addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
2105          addHRegUse(u, HRmRead,  i->ARMin.CMov.dst);
2106          addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
2107          return;
2108       case ARMin_Call:
2109          /* logic and comments copied/modified from x86 back end */
2110          /* This is a bit subtle. */
2111          /* First off, claim it trashes all the caller-saved regs
2112             which fall within the register allocator's jurisdiction.
2113             These I believe to be r0,1,2,3.  If it turns out that r9
2114             is also caller-saved, then we'll have to add that here
2115             too. */
2116          addHRegUse(u, HRmWrite, hregARM_R0());
2117          addHRegUse(u, HRmWrite, hregARM_R1());
2118          addHRegUse(u, HRmWrite, hregARM_R2());
2119          addHRegUse(u, HRmWrite, hregARM_R3());
2120          /* Now we have to state any parameter-carrying registers
2121             which might be read.  This depends on nArgRegs. */
2122          switch (i->ARMin.Call.nArgRegs) {
2123             case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
2124             case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
2125             case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
2126             case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
2127             case 0: break;
2128             default: vpanic("getRegUsage_ARM:Call:regparms");
2129          }
2130          /* Finally, there is the issue that the insn trashes a
2131             register because the literal target address has to be
2132             loaded into a register.  Fortunately, for the nArgRegs=
2133             0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
2134             this does not cause any further damage.  For the
2135             nArgRegs=4 case, we'll have to choose another register
2136             arbitrarily since all the caller saved regs are used for
2137             parameters, and so we might as well choose r11.
2138             */
2139          if (i->ARMin.Call.nArgRegs == 4)
2140             addHRegUse(u, HRmWrite, hregARM_R11());
2141          /* Upshot of this is that the assembler really must observe
2142             the here-stated convention of which register to use as an
2143             address temporary, depending on nArgRegs: 0==r0,
2144             1==r1, 2==r2, 3==r3, 4==r11 */
2145          return;
2146       case ARMin_Mul:
2147          addHRegUse(u, HRmRead, hregARM_R2());
2148          addHRegUse(u, HRmRead, hregARM_R3());
2149          addHRegUse(u, HRmWrite, hregARM_R0());
2150          if (i->ARMin.Mul.op != ARMmul_PLAIN)
2151             addHRegUse(u, HRmWrite, hregARM_R1());
2152          return;
2153       case ARMin_LdrEX:
2154          addHRegUse(u, HRmRead, hregARM_R4());
2155          addHRegUse(u, HRmWrite, hregARM_R2());
2156          if (i->ARMin.LdrEX.szB == 8)
2157             addHRegUse(u, HRmWrite, hregARM_R3());
2158          return;
2159       case ARMin_StrEX:
2160          addHRegUse(u, HRmRead, hregARM_R4());
2161          addHRegUse(u, HRmWrite, hregARM_R0());
2162          addHRegUse(u, HRmRead, hregARM_R2());
2163          if (i->ARMin.StrEX.szB == 8)
2164             addHRegUse(u, HRmRead, hregARM_R3());
2165          return;
2166       case ARMin_VLdStD:
2167          addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
2168          if (i->ARMin.VLdStD.isLoad) {
2169             addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
2170          } else {
2171             addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
2172          }
2173          return;
2174       case ARMin_VLdStS:
2175          addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
2176          if (i->ARMin.VLdStS.isLoad) {
2177             addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
2178          } else {
2179             addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
2180          }
2181          return;
2182       case ARMin_VAluD:
2183          addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
2184          addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
2185          addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
2186          return;
2187       case ARMin_VAluS:
2188          addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2189          addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2190          addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2191          return;
2192       case ARMin_VUnaryD:
2193          addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2194          addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2195          return;
2196       case ARMin_VUnaryS:
2197          addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2198          addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2199          return;
2200       case ARMin_VCmpD:
2201          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
2202          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
2203          return;
2204       case ARMin_VCMovD:
2205          addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2206          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.dst);
2207          addHRegUse(u, HRmRead,  i->ARMin.VCMovD.src);
2208          return;
2209       case ARMin_VCMovS:
2210          addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2211          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.dst);
2212          addHRegUse(u, HRmRead,  i->ARMin.VCMovS.src);
2213          return;
2214       case ARMin_VCvtSD:
2215          addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
2216          addHRegUse(u, HRmRead,  i->ARMin.VCvtSD.src);
2217          return;
2218       case ARMin_VXferD:
2219          if (i->ARMin.VXferD.toD) {
2220             addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2221             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rHi);
2222             addHRegUse(u, HRmRead,  i->ARMin.VXferD.rLo);
2223          } else {
2224             addHRegUse(u, HRmRead,  i->ARMin.VXferD.dD);
2225             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2226             addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2227          }
2228          return;
2229       case ARMin_VXferS:
2230          if (i->ARMin.VXferS.toS) {
2231             addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2232             addHRegUse(u, HRmRead,  i->ARMin.VXferS.rLo);
2233          } else {
2234             addHRegUse(u, HRmRead,  i->ARMin.VXferS.fD);
2235             addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2236          }
2237          return;
2238       case ARMin_VCvtID:
2239          addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2240          addHRegUse(u, HRmRead,  i->ARMin.VCvtID.src);
2241          return;
2242       case ARMin_FPSCR:
2243          if (i->ARMin.FPSCR.toFPSCR)
2244             addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
2245          else
2246             addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
2247          return;
2248       case ARMin_MFence:
2249          return;
2250       case ARMin_CLREX:
2251          return;
2252       case ARMin_NLdStQ:
2253          if (i->ARMin.NLdStQ.isLoad)
2254             addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
2255          else
2256             addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
2257          addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
2258          return;
2259       case ARMin_NLdStD:
2260          if (i->ARMin.NLdStD.isLoad)
2261             addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2262          else
2263             addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2264          addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2265          return;
2266       case ARMin_NUnary:
2267          addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2268          addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2269          return;
2270       case ARMin_NUnaryS:
2271          addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2272          addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2273          return;
2274       case ARMin_NShift:
2275          addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2276          addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2277          addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2278          return;
2279       case ARMin_NShl64:
2280          addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
2281          addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
2282          return;
2283       case ARMin_NDual:
2284          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2285          addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2286          addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2287          addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2288          return;
2289       case ARMin_NBinary:
2290          addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2291          /* TODO: sometimes dst is also being read! */
2292          // XXX fix this
2293          addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2294          addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2295          return;
2296       case ARMin_NeonImm:
2297          addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
2298          return;
2299       case ARMin_NCMovQ:
2300          addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2301          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.dst);
2302          addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.src);
2303          return;
2304       case ARMin_Add32:
2305          addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2306          addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2307          return;
2308       case ARMin_EvCheck:
2309          /* We expect both amodes only to mention r8, so this is in
2310             fact pointless, since r8 isn't allocatable, but
2311             anyway.. */
2312          addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
2313          addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
2314          addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
2315          return;
2316       case ARMin_ProfInc:
2317          addHRegUse(u, HRmWrite, hregARM_R12());
2318          addHRegUse(u, HRmWrite, hregARM_R11());
2319          return;
2320       default:
2321          ppARMInstr(i);
2322          vpanic("getRegUsage_ARMInstr");
2323    }
2324 }
2325 
2326 
mapRegs_ARMInstr(HRegRemap * m,ARMInstr * i,Bool mode64)2327 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2328 {
2329    vassert(mode64 == False);
2330    switch (i->tag) {
2331       case ARMin_Alu:
2332          i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
2333          i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
2334          mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
2335          return;
2336       case ARMin_Shift:
2337          i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
2338          i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
2339          mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
2340          return;
2341       case ARMin_Unary:
2342          i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
2343          i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
2344          return;
2345       case ARMin_CmpOrTst:
2346          i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
2347          mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
2348          return;
2349       case ARMin_Mov:
2350          i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
2351          mapRegs_ARMRI84(m, i->ARMin.Mov.src);
2352          return;
2353       case ARMin_Imm32:
2354          i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
2355          return;
2356       case ARMin_LdSt32:
2357          i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
2358          mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
2359          return;
2360       case ARMin_LdSt16:
2361          i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
2362          mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
2363          return;
2364       case ARMin_LdSt8U:
2365          i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
2366          mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
2367          return;
2368       case ARMin_Ld8S:
2369          i->ARMin.Ld8S.rD = lookupHRegRemap(m, i->ARMin.Ld8S.rD);
2370          mapRegs_ARMAMode2(m, i->ARMin.Ld8S.amode);
2371          return;
2372       case ARMin_XDirect:
2373          mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
2374          return;
2375       case ARMin_XIndir:
2376          i->ARMin.XIndir.dstGA
2377             = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
2378          mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
2379          return;
2380       case ARMin_XAssisted:
2381          i->ARMin.XAssisted.dstGA
2382             = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
2383          mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
2384          return;
2385       case ARMin_CMov:
2386          i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
2387          mapRegs_ARMRI84(m, i->ARMin.CMov.src);
2388          return;
2389       case ARMin_Call:
2390          return;
2391       case ARMin_Mul:
2392          return;
2393       case ARMin_LdrEX:
2394          return;
2395       case ARMin_StrEX:
2396          return;
2397       case ARMin_VLdStD:
2398          i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
2399          mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
2400          return;
2401       case ARMin_VLdStS:
2402          i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
2403          mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
2404          return;
2405       case ARMin_VAluD:
2406          i->ARMin.VAluD.dst  = lookupHRegRemap(m, i->ARMin.VAluD.dst);
2407          i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
2408          i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
2409          return;
2410       case ARMin_VAluS:
2411          i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
2412          i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
2413          i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
2414          return;
2415       case ARMin_VUnaryD:
2416          i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
2417          i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
2418          return;
2419       case ARMin_VUnaryS:
2420          i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
2421          i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
2422          return;
2423       case ARMin_VCmpD:
2424          i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
2425          i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
2426          return;
2427       case ARMin_VCMovD:
2428          i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
2429          i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
2430          return;
2431       case ARMin_VCMovS:
2432          i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
2433          i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
2434          return;
2435       case ARMin_VCvtSD:
2436          i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
2437          i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
2438          return;
2439       case ARMin_VXferD:
2440          i->ARMin.VXferD.dD  = lookupHRegRemap(m, i->ARMin.VXferD.dD);
2441          i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
2442          i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
2443          return;
2444       case ARMin_VXferS:
2445          i->ARMin.VXferS.fD  = lookupHRegRemap(m, i->ARMin.VXferS.fD);
2446          i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
2447          return;
2448       case ARMin_VCvtID:
2449          i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
2450          i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
2451          return;
2452       case ARMin_FPSCR:
2453          i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
2454          return;
2455       case ARMin_MFence:
2456          return;
2457       case ARMin_CLREX:
2458          return;
2459       case ARMin_NLdStQ:
2460          i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
2461          mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
2462          return;
2463       case ARMin_NLdStD:
2464          i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
2465          mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
2466          return;
2467       case ARMin_NUnary:
2468          i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
2469          i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
2470          return;
2471       case ARMin_NUnaryS:
2472          i->ARMin.NUnaryS.src->reg
2473             = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
2474          i->ARMin.NUnaryS.dst->reg
2475             = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
2476          return;
2477       case ARMin_NShift:
2478          i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
2479          i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
2480          i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
2481          return;
2482       case ARMin_NShl64:
2483          i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
2484          i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
2485          return;
2486       case ARMin_NDual:
2487          i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
2488          i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
2489          return;
2490       case ARMin_NBinary:
2491          i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
2492          i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
2493          i->ARMin.NBinary.dst  = lookupHRegRemap(m, i->ARMin.NBinary.dst);
2494          return;
2495       case ARMin_NeonImm:
2496          i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
2497          return;
2498       case ARMin_NCMovQ:
2499          i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
2500          i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
2501          return;
2502       case ARMin_Add32:
2503          i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
2504          i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
2505          return;
2506       case ARMin_EvCheck:
2507          /* We expect both amodes only to mention r8, so this is in
2508             fact pointless, since r8 isn't allocatable, but
2509             anyway.. */
2510          mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
2511          mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
2512          return;
2513       case ARMin_ProfInc:
2514          /* hardwires r11 and r12 -- nothing to modify. */
2515          return;
2516       default:
2517          ppARMInstr(i);
2518          vpanic("mapRegs_ARMInstr");
2519    }
2520 }
2521 
2522 /* Figure out if i represents a reg-reg move, and if so assign the
2523    source and destination to *src and *dst.  If in doubt say No.  Used
2524    by the register allocator to do move coalescing.
2525 */
isMove_ARMInstr(ARMInstr * i,HReg * src,HReg * dst)2526 Bool isMove_ARMInstr ( ARMInstr* i, HReg* src, HReg* dst )
2527 {
2528    /* Moves between integer regs */
2529    switch (i->tag) {
2530       case ARMin_Mov:
2531          if (i->ARMin.Mov.src->tag == ARMri84_R) {
2532             *src = i->ARMin.Mov.src->ARMri84.R.reg;
2533             *dst = i->ARMin.Mov.dst;
2534             return True;
2535          }
2536          break;
2537       case ARMin_VUnaryD:
2538          if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2539             *src = i->ARMin.VUnaryD.src;
2540             *dst = i->ARMin.VUnaryD.dst;
2541             return True;
2542          }
2543          break;
2544       case ARMin_VUnaryS:
2545          if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2546             *src = i->ARMin.VUnaryS.src;
2547             *dst = i->ARMin.VUnaryS.dst;
2548             return True;
2549          }
2550          break;
2551       case ARMin_NUnary:
2552          if (i->ARMin.NUnary.op == ARMneon_COPY) {
2553             *src = i->ARMin.NUnary.src;
2554             *dst = i->ARMin.NUnary.dst;
2555             return True;
2556          }
2557          break;
2558       default:
2559          break;
2560    }
2561 
2562    return False;
2563 }
2564 
2565 
2566 /* Generate arm spill/reload instructions under the direction of the
2567    register allocator.  Note it's critical these don't write the
2568    condition codes. */
2569 
genSpill_ARM(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2570 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2571                     HReg rreg, Int offsetB, Bool mode64 )
2572 {
2573    HRegClass rclass;
2574    vassert(offsetB >= 0);
2575    vassert(!hregIsVirtual(rreg));
2576    vassert(mode64 == False);
2577    *i1 = *i2 = NULL;
2578    rclass = hregClass(rreg);
2579    switch (rclass) {
2580       case HRcInt32:
2581          vassert(offsetB <= 4095);
2582          *i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
2583                                 rreg,
2584                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
2585          return;
2586       case HRcFlt32:
2587       case HRcFlt64: {
2588          HReg r8   = hregARM_R8();  /* baseblock */
2589          HReg r12  = hregARM_R12(); /* spill temp */
2590          HReg base = r8;
2591          vassert(0 == (offsetB & 3));
2592          if (offsetB >= 1024) {
2593             Int offsetKB = offsetB / 1024;
2594             /* r12 = r8 + (1024 * offsetKB) */
2595             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2596                                ARMRI84_I84(offsetKB, 11));
2597             offsetB -= (1024 * offsetKB);
2598             base = r12;
2599          }
2600          vassert(offsetB <= 1020);
2601          if (rclass == HRcFlt32) {
2602             *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
2603                                    rreg,
2604                                    mkARMAModeV(base, offsetB) );
2605          } else {
2606             *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
2607                                    rreg,
2608                                    mkARMAModeV(base, offsetB) );
2609          }
2610          return;
2611       }
2612       case HRcVec128: {
2613          HReg r8  = hregARM_R8();
2614          HReg r12 = hregARM_R12();
2615          *i1 = ARMInstr_Add32(r12, r8, offsetB);
2616          *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
2617          return;
2618       }
2619       default:
2620          ppHRegClass(rclass);
2621          vpanic("genSpill_ARM: unimplemented regclass");
2622    }
2623 }
2624 
genReload_ARM(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)2625 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2626                      HReg rreg, Int offsetB, Bool mode64 )
2627 {
2628    HRegClass rclass;
2629    vassert(offsetB >= 0);
2630    vassert(!hregIsVirtual(rreg));
2631    vassert(mode64 == False);
2632    *i1 = *i2 = NULL;
2633    rclass = hregClass(rreg);
2634    switch (rclass) {
2635       case HRcInt32:
2636          vassert(offsetB <= 4095);
2637          *i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
2638                                 rreg,
2639                                 ARMAMode1_RI(hregARM_R8(), offsetB) );
2640          return;
2641       case HRcFlt32:
2642       case HRcFlt64: {
2643          HReg r8   = hregARM_R8();  /* baseblock */
2644          HReg r12  = hregARM_R12(); /* spill temp */
2645          HReg base = r8;
2646          vassert(0 == (offsetB & 3));
2647          if (offsetB >= 1024) {
2648             Int offsetKB = offsetB / 1024;
2649             /* r12 = r8 + (1024 * offsetKB) */
2650             *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2651                                ARMRI84_I84(offsetKB, 11));
2652             offsetB -= (1024 * offsetKB);
2653             base = r12;
2654          }
2655          vassert(offsetB <= 1020);
2656          if (rclass == HRcFlt32) {
2657             *i2 = ARMInstr_VLdStS( True/*isLoad*/,
2658                                    rreg,
2659                                    mkARMAModeV(base, offsetB) );
2660          } else {
2661             *i2 = ARMInstr_VLdStD( True/*isLoad*/,
2662                                    rreg,
2663                                    mkARMAModeV(base, offsetB) );
2664          }
2665          return;
2666       }
2667       case HRcVec128: {
2668          HReg r8  = hregARM_R8();
2669          HReg r12 = hregARM_R12();
2670          *i1 = ARMInstr_Add32(r12, r8, offsetB);
2671          *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
2672          return;
2673       }
2674       default:
2675          ppHRegClass(rclass);
2676          vpanic("genReload_ARM: unimplemented regclass");
2677    }
2678 }
2679 
2680 
2681 /* Emit an instruction into buf and return the number of bytes used.
2682    Note that buf is not the insn's final place, and therefore it is
2683    imperative to emit position-independent code. */
2684 
iregNo(HReg r)2685 static inline UChar iregNo ( HReg r )
2686 {
2687    UInt n;
2688    vassert(hregClass(r) == HRcInt32);
2689    vassert(!hregIsVirtual(r));
2690    n = hregNumber(r);
2691    vassert(n <= 15);
2692    return toUChar(n);
2693 }
2694 
dregNo(HReg r)2695 static inline UChar dregNo ( HReg r )
2696 {
2697    UInt n;
2698    if (hregClass(r) != HRcFlt64)
2699       ppHRegClass(hregClass(r));
2700    vassert(hregClass(r) == HRcFlt64);
2701    vassert(!hregIsVirtual(r));
2702    n = hregNumber(r);
2703    vassert(n <= 31);
2704    return toUChar(n);
2705 }
2706 
fregNo(HReg r)2707 static inline UChar fregNo ( HReg r )
2708 {
2709    UInt n;
2710    vassert(hregClass(r) == HRcFlt32);
2711    vassert(!hregIsVirtual(r));
2712    n = hregNumber(r);
2713    vassert(n <= 31);
2714    return toUChar(n);
2715 }
2716 
qregNo(HReg r)2717 static inline UChar qregNo ( HReg r )
2718 {
2719    UInt n;
2720    vassert(hregClass(r) == HRcVec128);
2721    vassert(!hregIsVirtual(r));
2722    n = hregNumber(r);
2723    vassert(n <= 15);
2724    return toUChar(n);
2725 }
2726 
2727 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2728    (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2729 #define X0000  BITS4(0,0,0,0)
2730 #define X0001  BITS4(0,0,0,1)
2731 #define X0010  BITS4(0,0,1,0)
2732 #define X0011  BITS4(0,0,1,1)
2733 #define X0100  BITS4(0,1,0,0)
2734 #define X0101  BITS4(0,1,0,1)
2735 #define X0110  BITS4(0,1,1,0)
2736 #define X0111  BITS4(0,1,1,1)
2737 #define X1000  BITS4(1,0,0,0)
2738 #define X1001  BITS4(1,0,0,1)
2739 #define X1010  BITS4(1,0,1,0)
2740 #define X1011  BITS4(1,0,1,1)
2741 #define X1100  BITS4(1,1,0,0)
2742 #define X1101  BITS4(1,1,0,1)
2743 #define X1110  BITS4(1,1,1,0)
2744 #define X1111  BITS4(1,1,1,1)
2745 
2746 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2747    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2748     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2749     (((zzx3) & 0xF) << 12))
2750 
2751 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
2752    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2753     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2754     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
2755 
2756 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
2757    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2758     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2759     (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
2760 
2761 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2762   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2763    (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2764    (((zzx0) & 0xF) << 0))
2765 
2766 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
2767    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2768     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2769     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
2770     (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
2771 
2772 #define XX______(zzx7,zzx6) \
2773    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2774 
2775 /* Generate a skeletal insn that involves an a RI84 shifter operand.
2776    Returns a word which is all zeroes apart from bits 25 and 11..0,
2777    since it is those that encode the shifter operand (at least to the
2778    extent that we care about it.) */
skeletal_RI84(ARMRI84 * ri)2779 static UInt skeletal_RI84 ( ARMRI84* ri )
2780 {
2781    UInt instr;
2782    if (ri->tag == ARMri84_I84) {
2783       vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
2784       vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
2785       instr = 1 << 25;
2786       instr |= (ri->ARMri84.I84.imm4 << 8);
2787       instr |= ri->ARMri84.I84.imm8;
2788    } else {
2789       instr = 0 << 25;
2790       instr |= iregNo(ri->ARMri84.R.reg);
2791    }
2792    return instr;
2793 }
2794 
2795 /* Ditto for RI5.  Resulting word is zeroes apart from bit 4 and bits
2796    11..7. */
skeletal_RI5(ARMRI5 * ri)2797 static UInt skeletal_RI5 ( ARMRI5* ri )
2798 {
2799    UInt instr;
2800    if (ri->tag == ARMri5_I5) {
2801       UInt imm5 = ri->ARMri5.I5.imm5;
2802       vassert(imm5 >= 1 && imm5 <= 31);
2803       instr = 0 << 4;
2804       instr |= imm5 << 7;
2805    } else {
2806       instr = 1 << 4;
2807       instr |= iregNo(ri->ARMri5.R.reg) << 8;
2808    }
2809    return instr;
2810 }
2811 
2812 
2813 /* Get an immediate into a register, using only that
2814    register.  (very lame..) */
imm32_to_iregNo(UInt * p,Int rD,UInt imm32)2815 static UInt* imm32_to_iregNo ( UInt* p, Int rD, UInt imm32 )
2816 {
2817    UInt instr;
2818    vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
2819 #if 0
2820    if (0 == (imm32 & ~0xFF)) {
2821       /* mov with a immediate shifter operand of (0, imm32) (??) */
2822       instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
2823       instr |= imm32;
2824       *p++ = instr;
2825    } else {
2826       // this is very bad; causes Dcache pollution
2827       // ldr  rD, [pc]
2828       instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
2829       *p++ = instr;
2830       // b .+8
2831       instr = 0xEA000000;
2832       *p++ = instr;
2833       // .word imm32
2834       *p++ = imm32;
2835    }
2836 #else
2837    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2838       /* Generate movw rD, #low16.  Then, if the high 16 are
2839          nonzero, generate movt rD, #high16. */
2840       UInt lo16 = imm32 & 0xFFFF;
2841       UInt hi16 = (imm32 >> 16) & 0xFFFF;
2842       instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2843                        (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2844                        lo16 & 0xF);
2845       *p++ = instr;
2846       if (hi16 != 0) {
2847          instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2848                           (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2849                           hi16 & 0xF);
2850          *p++ = instr;
2851       }
2852    } else {
2853       UInt imm, rot;
2854       UInt op = X1010;
2855       UInt rN = 0;
2856       if ((imm32 & 0xFF) || (imm32 == 0)) {
2857          imm = imm32 & 0xFF;
2858          rot = 0;
2859          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2860          *p++ = instr;
2861          op = X1000;
2862          rN = rD;
2863       }
2864       if (imm32 & 0xFF000000) {
2865          imm = (imm32 >> 24) & 0xFF;
2866          rot = 4;
2867          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2868          *p++ = instr;
2869          op = X1000;
2870          rN = rD;
2871       }
2872       if (imm32 & 0xFF0000) {
2873          imm = (imm32 >> 16) & 0xFF;
2874          rot = 8;
2875          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2876          *p++ = instr;
2877          op = X1000;
2878          rN = rD;
2879       }
2880       if (imm32 & 0xFF00) {
2881          imm = (imm32 >> 8) & 0xFF;
2882          rot = 12;
2883          instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2884          *p++ = instr;
2885          op = X1000;
2886          rN = rD;
2887       }
2888    }
2889 #endif
2890    return p;
2891 }
2892 
2893 /* Get an immediate into a register, using only that register, and
2894    generating exactly 2 instructions, regardless of the value of the
2895    immediate. This is used when generating sections of code that need
2896    to be patched later, so as to guarantee a specific size. */
imm32_to_iregNo_EXACTLY2(UInt * p,Int rD,UInt imm32)2897 static UInt* imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2898 {
2899    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2900       /* Generate movw rD, #low16 ;  movt rD, #high16. */
2901       UInt lo16 = imm32 & 0xFFFF;
2902       UInt hi16 = (imm32 >> 16) & 0xFFFF;
2903       UInt instr;
2904       instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2905                        (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2906                        lo16 & 0xF);
2907       *p++ = instr;
2908       instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2909                        (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2910                        hi16 & 0xF);
2911       *p++ = instr;
2912    } else {
2913       vassert(0); /* lose */
2914    }
2915    return p;
2916 }
2917 
2918 /* Check whether p points at a 2-insn sequence cooked up by
2919    imm32_to_iregNo_EXACTLY2(). */
is_imm32_to_iregNo_EXACTLY2(UInt * p,Int rD,UInt imm32)2920 static Bool is_imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2921 {
2922    if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2923       /* Generate movw rD, #low16 ;  movt rD, #high16. */
2924       UInt lo16 = imm32 & 0xFFFF;
2925       UInt hi16 = (imm32 >> 16) & 0xFFFF;
2926       UInt i0, i1;
2927       i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2928                     (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2929                     lo16 & 0xF);
2930       i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2931                     (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2932                     hi16 & 0xF);
2933       return p[0] == i0 && p[1] == i1;
2934    } else {
2935       vassert(0); /* lose */
2936    }
2937 }
2938 
2939 
do_load_or_store32(UInt * p,Bool isLoad,UInt rD,ARMAMode1 * am)2940 static UInt* do_load_or_store32 ( UInt* p,
2941                                   Bool isLoad, UInt rD, ARMAMode1* am )
2942 {
2943    vassert(rD <= 12);
2944    vassert(am->tag == ARMam1_RI); // RR case is not handled
2945    UInt bB = 0;
2946    UInt bL = isLoad ? 1 : 0;
2947    Int  simm12;
2948    UInt instr, bP;
2949    if (am->ARMam1.RI.simm13 < 0) {
2950       bP = 0;
2951       simm12 = -am->ARMam1.RI.simm13;
2952    } else {
2953       bP = 1;
2954       simm12 = am->ARMam1.RI.simm13;
2955    }
2956    vassert(simm12 >= 0 && simm12 <= 4095);
2957    instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
2958                     iregNo(am->ARMam1.RI.reg),
2959                     rD);
2960    instr |= simm12;
2961    *p++ = instr;
2962    return p;
2963 }
2964 
2965 
2966 /* Emit an instruction into buf and return the number of bytes used.
2967    Note that buf is not the insn's final place, and therefore it is
2968    imperative to emit position-independent code.  If the emitted
2969    instruction was a profiler inc, set *is_profInc to True, else
2970    leave it unchanged. */
2971 
emit_ARMInstr(Bool * is_profInc,UChar * buf,Int nbuf,ARMInstr * i,Bool mode64,void * disp_cp_chain_me_to_slowEP,void * disp_cp_chain_me_to_fastEP,void * disp_cp_xindir,void * disp_cp_xassisted)2972 Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
2973                     UChar* buf, Int nbuf, ARMInstr* i,
2974                     Bool mode64,
2975                     void* disp_cp_chain_me_to_slowEP,
2976                     void* disp_cp_chain_me_to_fastEP,
2977                     void* disp_cp_xindir,
2978                     void* disp_cp_xassisted )
2979 {
2980    UInt* p = (UInt*)buf;
2981    vassert(nbuf >= 32);
2982    vassert(mode64 == False);
2983    vassert(0 == (((HWord)buf) & 3));
2984 
2985    switch (i->tag) {
2986       case ARMin_Alu: {
2987          UInt     instr, subopc;
2988          UInt     rD   = iregNo(i->ARMin.Alu.dst);
2989          UInt     rN   = iregNo(i->ARMin.Alu.argL);
2990          ARMRI84* argR = i->ARMin.Alu.argR;
2991          switch (i->ARMin.Alu.op) {
2992             case ARMalu_ADDS: /* fallthru */
2993             case ARMalu_ADD:  subopc = X0100; break;
2994             case ARMalu_ADC:  subopc = X0101; break;
2995             case ARMalu_SUBS: /* fallthru */
2996             case ARMalu_SUB:  subopc = X0010; break;
2997             case ARMalu_SBC:  subopc = X0110; break;
2998             case ARMalu_AND:  subopc = X0000; break;
2999             case ARMalu_BIC:  subopc = X1110; break;
3000             case ARMalu_OR:   subopc = X1100; break;
3001             case ARMalu_XOR:  subopc = X0001; break;
3002             default: goto bad;
3003          }
3004          instr = skeletal_RI84(argR);
3005          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3006                            (subopc << 1) & 0xF, rN, rD);
3007          if (i->ARMin.Alu.op == ARMalu_ADDS
3008              || i->ARMin.Alu.op == ARMalu_SUBS) {
3009             instr |= 1<<20;  /* set the S bit */
3010          }
3011          *p++ = instr;
3012          goto done;
3013       }
3014       case ARMin_Shift: {
3015          UInt    instr, subopc;
3016          UInt    rD   = iregNo(i->ARMin.Shift.dst);
3017          UInt    rM   = iregNo(i->ARMin.Shift.argL);
3018          ARMRI5* argR = i->ARMin.Shift.argR;
3019          switch (i->ARMin.Shift.op) {
3020             case ARMsh_SHL: subopc = X0000; break;
3021             case ARMsh_SHR: subopc = X0001; break;
3022             case ARMsh_SAR: subopc = X0010; break;
3023             default: goto bad;
3024          }
3025          instr = skeletal_RI5(argR);
3026          instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
3027          instr |= (subopc & 3) << 5;
3028          *p++ = instr;
3029          goto done;
3030       }
3031       case ARMin_Unary: {
3032          UInt instr;
3033          UInt rDst = iregNo(i->ARMin.Unary.dst);
3034          UInt rSrc = iregNo(i->ARMin.Unary.src);
3035          switch (i->ARMin.Unary.op) {
3036             case ARMun_CLZ:
3037                instr = XXXXXXXX(X1110,X0001,X0110,X1111,
3038                                 rDst,X1111,X0001,rSrc);
3039                *p++ = instr;
3040                goto done;
3041             case ARMun_NEG: /* RSB rD,rS,#0 */
3042                instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
3043                *p++ = instr;
3044                goto done;
3045             case ARMun_NOT: {
3046                UInt subopc = X1111; /* MVN */
3047                instr = rSrc;
3048                instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3049                                  (subopc << 1) & 0xF, 0, rDst);
3050                *p++ = instr;
3051                goto done;
3052             }
3053             default:
3054                break;
3055          }
3056          goto bad;
3057       }
3058       case ARMin_CmpOrTst: {
3059          UInt instr  = skeletal_RI84(i->ARMin.CmpOrTst.argR);
3060          UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
3061          UInt SBZ    = 0;
3062          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3063                            ((subopc << 1) & 0xF) | 1,
3064                            iregNo(i->ARMin.CmpOrTst.argL), SBZ );
3065          *p++ = instr;
3066          goto done;
3067       }
3068       case ARMin_Mov: {
3069          UInt instr  = skeletal_RI84(i->ARMin.Mov.src);
3070          UInt subopc = X1101; /* MOV */
3071          UInt SBZ    = 0;
3072          instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3073                            (subopc << 1) & 0xF, SBZ,
3074                            iregNo(i->ARMin.Mov.dst));
3075          *p++ = instr;
3076          goto done;
3077       }
3078       case ARMin_Imm32: {
3079          p = imm32_to_iregNo( (UInt*)p, iregNo(i->ARMin.Imm32.dst),
3080                                         i->ARMin.Imm32.imm32 );
3081          goto done;
3082       }
3083       case ARMin_LdSt32:
3084       case ARMin_LdSt8U: {
3085          UInt        bL, bB;
3086          HReg        rD;
3087          ARMAMode1*  am;
3088          ARMCondCode cc;
3089          if (i->tag == ARMin_LdSt32) {
3090             bB = 0;
3091             bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
3092             am = i->ARMin.LdSt32.amode;
3093             rD = i->ARMin.LdSt32.rD;
3094             cc = i->ARMin.LdSt32.cc;
3095          } else {
3096             bB = 1;
3097             bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
3098             am = i->ARMin.LdSt8U.amode;
3099             rD = i->ARMin.LdSt8U.rD;
3100             cc = i->ARMin.LdSt8U.cc;
3101          }
3102          vassert(cc != ARMcc_NV);
3103          if (am->tag == ARMam1_RI) {
3104             Int  simm12;
3105             UInt instr, bP;
3106             if (am->ARMam1.RI.simm13 < 0) {
3107                bP = 0;
3108                simm12 = -am->ARMam1.RI.simm13;
3109             } else {
3110                bP = 1;
3111                simm12 = am->ARMam1.RI.simm13;
3112             }
3113             vassert(simm12 >= 0 && simm12 <= 4095);
3114             instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
3115                              iregNo(am->ARMam1.RI.reg),
3116                              iregNo(rD));
3117             instr |= simm12;
3118             *p++ = instr;
3119             goto done;
3120          } else {
3121             // RR case
3122             goto bad;
3123          }
3124       }
3125       case ARMin_LdSt16: {
3126          HReg        rD = i->ARMin.LdSt16.rD;
3127          UInt        bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
3128          UInt        bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
3129          ARMAMode2*  am = i->ARMin.LdSt16.amode;
3130          ARMCondCode cc = i->ARMin.LdSt16.cc;
3131          vassert(cc != ARMcc_NV);
3132          if (am->tag == ARMam2_RI) {
3133             HReg rN = am->ARMam2.RI.reg;
3134             Int  simm8;
3135             UInt bP, imm8hi, imm8lo, instr;
3136             if (am->ARMam2.RI.simm9 < 0) {
3137                bP = 0;
3138                simm8 = -am->ARMam2.RI.simm9;
3139             } else {
3140                bP = 1;
3141                simm8 = am->ARMam2.RI.simm9;
3142             }
3143             vassert(simm8 >= 0 && simm8 <= 255);
3144             imm8hi = (simm8 >> 4) & 0xF;
3145             imm8lo = simm8 & 0xF;
3146             vassert(!(bL == 0 && bS == 1)); // "! signed store"
3147             /**/ if (bL == 0 && bS == 0) {
3148                // strh
3149                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregNo(rN),
3150                                 iregNo(rD), imm8hi, X1011, imm8lo);
3151                *p++ = instr;
3152                goto done;
3153             }
3154             else if (bL == 1 && bS == 0) {
3155                // ldrh
3156                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
3157                                 iregNo(rD), imm8hi, X1011, imm8lo);
3158                *p++ = instr;
3159                goto done;
3160             }
3161             else if (bL == 1 && bS == 1) {
3162                // ldrsh
3163                instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
3164                                 iregNo(rD), imm8hi, X1111, imm8lo);
3165                *p++ = instr;
3166                goto done;
3167             }
3168             else vassert(0); // ill-constructed insn
3169          } else {
3170             // RR case
3171             goto bad;
3172          }
3173       }
3174       case ARMin_Ld8S: {
3175          HReg        rD = i->ARMin.Ld8S.rD;
3176          ARMAMode2*  am = i->ARMin.Ld8S.amode;
3177          ARMCondCode cc = i->ARMin.Ld8S.cc;
3178          vassert(cc != ARMcc_NV);
3179          if (am->tag == ARMam2_RI) {
3180             HReg rN = am->ARMam2.RI.reg;
3181             Int  simm8;
3182             UInt bP, imm8hi, imm8lo, instr;
3183             if (am->ARMam2.RI.simm9 < 0) {
3184                bP = 0;
3185                simm8 = -am->ARMam2.RI.simm9;
3186             } else {
3187                bP = 1;
3188                simm8 = am->ARMam2.RI.simm9;
3189             }
3190             vassert(simm8 >= 0 && simm8 <= 255);
3191             imm8hi = (simm8 >> 4) & 0xF;
3192             imm8lo = simm8 & 0xF;
3193             // ldrsb
3194             instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregNo(rN),
3195                              iregNo(rD), imm8hi, X1101, imm8lo);
3196             *p++ = instr;
3197             goto done;
3198          } else {
3199             // RR case
3200             goto bad;
3201          }
3202       }
3203 
3204       case ARMin_XDirect: {
3205          /* NB: what goes on here has to be very closely coordinated
3206             with the chainXDirect_ARM and unchainXDirect_ARM below. */
3207          /* We're generating chain-me requests here, so we need to be
3208             sure this is actually allowed -- no-redir translations
3209             can't use chain-me's.  Hence: */
3210          vassert(disp_cp_chain_me_to_slowEP != NULL);
3211          vassert(disp_cp_chain_me_to_fastEP != NULL);
3212 
3213          /* Use ptmp for backpatching conditional jumps. */
3214          UInt* ptmp = NULL;
3215 
3216          /* First off, if this is conditional, create a conditional
3217             jump over the rest of it.  Or at least, leave a space for
3218             it that we will shortly fill in. */
3219          if (i->ARMin.XDirect.cond != ARMcc_AL) {
3220             vassert(i->ARMin.XDirect.cond != ARMcc_NV);
3221             ptmp = p;
3222             *p++ = 0;
3223          }
3224 
3225          /* Update the guest R15T. */
3226          /* movw r12, lo16(dstGA) */
3227          /* movt r12, hi16(dstGA) */
3228          /* str r12, amR15T */
3229          p = imm32_to_iregNo(p, /*r*/12, i->ARMin.XDirect.dstGA);
3230          p = do_load_or_store32(p, False/*!isLoad*/,
3231                                 /*r*/12, i->ARMin.XDirect.amR15T);
3232 
3233          /* --- FIRST PATCHABLE BYTE follows --- */
3234          /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3235             calling to) backs up the return address, so as to find the
3236             address of the first patchable byte.  So: don't change the
3237             number of instructions (3) below. */
3238          /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3239          /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3240          /* blx  r12  (A1) */
3241          void* disp_cp_chain_me
3242                   = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3243                                               : disp_cp_chain_me_to_slowEP;
3244          p = imm32_to_iregNo_EXACTLY2(p, /*r*/12,
3245                                       (UInt)Ptr_to_ULong(disp_cp_chain_me));
3246          *p++ = 0xE12FFF3C;
3247          /* --- END of PATCHABLE BYTES --- */
3248 
3249          /* Fix up the conditional jump, if there was one. */
3250          if (i->ARMin.XDirect.cond != ARMcc_AL) {
3251             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3252             vassert(delta > 0 && delta < 40);
3253             vassert((delta & 3) == 0);
3254             UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
3255             vassert(notCond <= 13); /* Neither AL nor NV */
3256             delta = (delta >> 2) - 2;
3257             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3258          }
3259          goto done;
3260       }
3261 
3262       case ARMin_XIndir: {
3263          /* We're generating transfers that could lead indirectly to a
3264             chain-me, so we need to be sure this is actually allowed
3265             -- no-redir translations are not allowed to reach normal
3266             translations without going through the scheduler.  That
3267             means no XDirects or XIndirs out from no-redir
3268             translations.  Hence: */
3269          vassert(disp_cp_xindir != NULL);
3270 
3271          /* Use ptmp for backpatching conditional jumps. */
3272          UInt* ptmp = NULL;
3273 
3274          /* First off, if this is conditional, create a conditional
3275             jump over the rest of it.  Or at least, leave a space for
3276             it that we will shortly fill in. */
3277          if (i->ARMin.XIndir.cond != ARMcc_AL) {
3278             vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3279             ptmp = p;
3280             *p++ = 0;
3281          }
3282 
3283          /* Update the guest R15T. */
3284          /* str r-dstGA, amR15T */
3285          p = do_load_or_store32(p, False/*!isLoad*/,
3286                                 iregNo(i->ARMin.XIndir.dstGA),
3287                                 i->ARMin.XIndir.amR15T);
3288 
3289          /* movw r12, lo16(VG_(disp_cp_xindir)) */
3290          /* movt r12, hi16(VG_(disp_cp_xindir)) */
3291          /* bx   r12  (A1) */
3292          p = imm32_to_iregNo(p, /*r*/12,
3293                              (UInt)Ptr_to_ULong(disp_cp_xindir));
3294          *p++ = 0xE12FFF1C;
3295 
3296          /* Fix up the conditional jump, if there was one. */
3297          if (i->ARMin.XIndir.cond != ARMcc_AL) {
3298             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3299             vassert(delta > 0 && delta < 40);
3300             vassert((delta & 3) == 0);
3301             UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3302             vassert(notCond <= 13); /* Neither AL nor NV */
3303             delta = (delta >> 2) - 2;
3304             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3305          }
3306          goto done;
3307       }
3308 
3309       case ARMin_XAssisted: {
3310          /* Use ptmp for backpatching conditional jumps. */
3311          UInt* ptmp = NULL;
3312 
3313          /* First off, if this is conditional, create a conditional
3314             jump over the rest of it.  Or at least, leave a space for
3315             it that we will shortly fill in. */
3316          if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3317             vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
3318             ptmp = p;
3319             *p++ = 0;
3320          }
3321 
3322          /* Update the guest R15T. */
3323          /* str r-dstGA, amR15T */
3324          p = do_load_or_store32(p, False/*!isLoad*/,
3325                                 iregNo(i->ARMin.XAssisted.dstGA),
3326                                 i->ARMin.XAssisted.amR15T);
3327 
3328          /* movw r8,  $magic_number */
3329          UInt trcval = 0;
3330          switch (i->ARMin.XAssisted.jk) {
3331             case Ijk_ClientReq:   trcval = VEX_TRC_JMP_CLIENTREQ;   break;
3332             case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3333             //case Ijk_Sys_int128:  trcval = VEX_TRC_JMP_SYS_INT128;  break;
3334             case Ijk_Yield:       trcval = VEX_TRC_JMP_YIELD;       break;
3335             //case Ijk_EmWarn:      trcval = VEX_TRC_JMP_EMWARN;      break;
3336             //case Ijk_MapFail:     trcval = VEX_TRC_JMP_MAPFAIL;     break;
3337             case Ijk_NoDecode:    trcval = VEX_TRC_JMP_NODECODE;    break;
3338             case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3339             case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
3340             //case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
3341             //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
3342             case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
3343             /* We don't expect to see the following being assisted. */
3344             //case Ijk_Ret:
3345             //case Ijk_Call:
3346             /* fallthrough */
3347             default:
3348                ppIRJumpKind(i->ARMin.XAssisted.jk);
3349                vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
3350          }
3351          vassert(trcval != 0);
3352          p = imm32_to_iregNo(p, /*r*/8, trcval);
3353 
3354          /* movw r12, lo16(VG_(disp_cp_xassisted)) */
3355          /* movt r12, hi16(VG_(disp_cp_xassisted)) */
3356          /* bx   r12  (A1) */
3357          p = imm32_to_iregNo(p, /*r*/12,
3358                              (UInt)Ptr_to_ULong(disp_cp_xassisted));
3359          *p++ = 0xE12FFF1C;
3360 
3361          /* Fix up the conditional jump, if there was one. */
3362          if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3363             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3364             vassert(delta > 0 && delta < 40);
3365             vassert((delta & 3) == 0);
3366             UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
3367             vassert(notCond <= 13); /* Neither AL nor NV */
3368             delta = (delta >> 2) - 2;
3369             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3370          }
3371          goto done;
3372       }
3373 
3374       case ARMin_CMov: {
3375          UInt instr  = skeletal_RI84(i->ARMin.CMov.src);
3376          UInt subopc = X1101; /* MOV */
3377          UInt SBZ    = 0;
3378          instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
3379                            (subopc << 1) & 0xF, SBZ,
3380                            iregNo(i->ARMin.CMov.dst));
3381          *p++ = instr;
3382          goto done;
3383       }
3384 
3385       case ARMin_Call: {
3386          UInt instr;
3387          /* Decide on a scratch reg used to hold to the call address.
3388             This has to be done as per the comments in getRegUsage. */
3389          Int scratchNo;
3390          switch (i->ARMin.Call.nArgRegs) {
3391             case 0:  scratchNo = 0;  break;
3392             case 1:  scratchNo = 1;  break;
3393             case 2:  scratchNo = 2;  break;
3394             case 3:  scratchNo = 3;  break;
3395             case 4:  scratchNo = 11; break;
3396             default: vassert(0);
3397          }
3398          /* If we don't need to do any fixup actions in the case that
3399             the call doesn't happen, just do the simple thing and emit
3400             straight-line code.  We hope this is the common case. */
3401          if (i->ARMin.Call.cond == ARMcc_AL/*call always happens*/
3402              || i->ARMin.Call.rloc.pri == RLPri_None/*no fixup action*/) {
3403             // r"scratchNo" = &target
3404             p = imm32_to_iregNo( (UInt*)p,
3405                                  scratchNo, (UInt)i->ARMin.Call.target );
3406             // blx{cond} r"scratchNo"
3407             instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
3408                              X0011, scratchNo);
3409             instr |= 0xFFF << 8; // stick in the SBOnes
3410             *p++ = instr;
3411          } else {
3412             Int delta;
3413             /* Complex case.  We have to generate an if-then-else
3414                diamond. */
3415             // before:
3416             //   b{!cond} else:
3417             //   r"scratchNo" = &target
3418             //   blx{AL} r"scratchNo"
3419             // preElse:
3420             //   b after:
3421             // else:
3422             //   mov r0, #0x55555555  // possibly
3423             //   mov r1, r0           // possibly
3424             // after:
3425 
3426             // before:
3427             UInt* pBefore = p;
3428 
3429             //   b{!cond} else:  // ptmp1 points here
3430             *p++ = 0; // filled in later
3431 
3432             //   r"scratchNo" = &target
3433             p = imm32_to_iregNo( (UInt*)p,
3434                                  scratchNo, (UInt)i->ARMin.Call.target );
3435 
3436             //   blx{AL} r"scratchNo"
3437             instr = XXX___XX(ARMcc_AL, X0001, X0010, /*___*/
3438                              X0011, scratchNo);
3439             instr |= 0xFFF << 8; // stick in the SBOnes
3440             *p++ = instr;
3441 
3442             // preElse:
3443             UInt* pPreElse = p;
3444 
3445             //   b after:
3446             *p++ = 0; // filled in later
3447 
3448             // else:
3449             delta = (UChar*)p - (UChar*)pBefore;
3450             delta = (delta >> 2) - 2;
3451             *pBefore
3452                = XX______(1 ^ i->ARMin.Call.cond, X1010) | (delta & 0xFFFFFF);
3453 
3454             /* Do the 'else' actions */
3455             switch (i->ARMin.Call.rloc.pri) {
3456                case RLPri_Int:
3457                   p = imm32_to_iregNo_EXACTLY2(p, /*r*/0, 0x55555555);
3458                   break;
3459                case RLPri_2Int:
3460                   vassert(0); //ATC
3461                   p = imm32_to_iregNo_EXACTLY2(p, /*r*/0, 0x55555555);
3462                   /* mov r1, r0 */
3463                   *p++ = 0xE1A01000;
3464                   break;
3465                case RLPri_None: case RLPri_INVALID: default:
3466                   vassert(0);
3467             }
3468 
3469             // after:
3470             delta = (UChar*)p - (UChar*)pPreElse;
3471             delta = (delta >> 2) - 2;
3472             *pPreElse = XX______(ARMcc_AL, X1010) | (delta & 0xFFFFFF);
3473          }
3474 
3475          goto done;
3476       }
3477 
3478       case ARMin_Mul: {
3479          /* E0000392   mul     r0, r2, r3
3480             E0810392   umull   r0(LO), r1(HI), r2, r3
3481             E0C10392   smull   r0(LO), r1(HI), r2, r3
3482          */
3483          switch (i->ARMin.Mul.op) {
3484             case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
3485             case ARMmul_ZX:    *p++ = 0xE0810392; goto done;
3486             case ARMmul_SX:    *p++ = 0xE0C10392; goto done;
3487             default: vassert(0);
3488          }
3489          goto bad;
3490       }
3491       case ARMin_LdrEX: {
3492          /* E1D42F9F   ldrexb r2, [r4]
3493             E1F42F9F   ldrexh r2, [r4]
3494             E1942F9F   ldrex  r2, [r4]
3495             E1B42F9F   ldrexd r2, r3, [r4]
3496          */
3497          switch (i->ARMin.LdrEX.szB) {
3498             case 1: *p++ = 0xE1D42F9F; goto done;
3499             case 2: *p++ = 0xE1F42F9F; goto done;
3500             case 4: *p++ = 0xE1942F9F; goto done;
3501             case 8: *p++ = 0xE1B42F9F; goto done;
3502             default: break;
3503          }
3504          goto bad;
3505       }
3506       case ARMin_StrEX: {
3507          /* E1C40F92   strexb r0, r2, [r4]
3508             E1E40F92   strexh r0, r2, [r4]
3509             E1840F92   strex  r0, r2, [r4]
3510             E1A40F92   strexd r0, r2, r3, [r4]
3511          */
3512          switch (i->ARMin.StrEX.szB) {
3513             case 1: *p++ = 0xE1C40F92; goto done;
3514             case 2: *p++ = 0xE1E40F92; goto done;
3515             case 4: *p++ = 0xE1840F92; goto done;
3516             case 8: *p++ = 0xE1A40F92; goto done;
3517             default: break;
3518          }
3519          goto bad;
3520       }
3521       case ARMin_VLdStD: {
3522          UInt dD     = dregNo(i->ARMin.VLdStD.dD);
3523          UInt rN     = iregNo(i->ARMin.VLdStD.amode->reg);
3524          Int  simm11 = i->ARMin.VLdStD.amode->simm11;
3525          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3526          UInt bU     = simm11 >= 0 ? 1 : 0;
3527          UInt bL     = i->ARMin.VLdStD.isLoad ? 1 : 0;
3528          UInt insn;
3529          vassert(0 == (off8 & 3));
3530          off8 >>= 2;
3531          vassert(0 == (off8 & 0xFFFFFF00));
3532          insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
3533          insn |= off8;
3534          *p++ = insn;
3535          goto done;
3536       }
3537       case ARMin_VLdStS: {
3538          UInt fD     = fregNo(i->ARMin.VLdStS.fD);
3539          UInt rN     = iregNo(i->ARMin.VLdStS.amode->reg);
3540          Int  simm11 = i->ARMin.VLdStS.amode->simm11;
3541          UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3542          UInt bU     = simm11 >= 0 ? 1 : 0;
3543          UInt bL     = i->ARMin.VLdStS.isLoad ? 1 : 0;
3544          UInt bD     = fD & 1;
3545          UInt insn;
3546          vassert(0 == (off8 & 3));
3547          off8 >>= 2;
3548          vassert(0 == (off8 & 0xFFFFFF00));
3549          insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
3550          insn |= off8;
3551          *p++ = insn;
3552          goto done;
3553       }
3554       case ARMin_VAluD: {
3555          UInt dN = dregNo(i->ARMin.VAluD.argL);
3556          UInt dD = dregNo(i->ARMin.VAluD.dst);
3557          UInt dM = dregNo(i->ARMin.VAluD.argR);
3558          UInt pqrs = X1111; /* undefined */
3559          switch (i->ARMin.VAluD.op) {
3560             case ARMvfp_ADD: pqrs = X0110; break;
3561             case ARMvfp_SUB: pqrs = X0111; break;
3562             case ARMvfp_MUL: pqrs = X0100; break;
3563             case ARMvfp_DIV: pqrs = X1000; break;
3564             default: goto bad;
3565          }
3566          vassert(pqrs != X1111);
3567          UInt bP  = (pqrs >> 3) & 1;
3568          UInt bQ  = (pqrs >> 2) & 1;
3569          UInt bR  = (pqrs >> 1) & 1;
3570          UInt bS  = (pqrs >> 0) & 1;
3571          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
3572                               X1011, BITS4(0,bS,0,0), dM);
3573          *p++ = insn;
3574          goto done;
3575       }
3576       case ARMin_VAluS: {
3577          UInt dN = fregNo(i->ARMin.VAluS.argL);
3578          UInt dD = fregNo(i->ARMin.VAluS.dst);
3579          UInt dM = fregNo(i->ARMin.VAluS.argR);
3580          UInt bN = dN & 1;
3581          UInt bD = dD & 1;
3582          UInt bM = dM & 1;
3583          UInt pqrs = X1111; /* undefined */
3584          switch (i->ARMin.VAluS.op) {
3585             case ARMvfp_ADD: pqrs = X0110; break;
3586             case ARMvfp_SUB: pqrs = X0111; break;
3587             case ARMvfp_MUL: pqrs = X0100; break;
3588             case ARMvfp_DIV: pqrs = X1000; break;
3589             default: goto bad;
3590          }
3591          vassert(pqrs != X1111);
3592          UInt bP  = (pqrs >> 3) & 1;
3593          UInt bQ  = (pqrs >> 2) & 1;
3594          UInt bR  = (pqrs >> 1) & 1;
3595          UInt bS  = (pqrs >> 0) & 1;
3596          UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
3597                               (dN >> 1), (dD >> 1),
3598                               X1010, BITS4(bN,bS,bM,0), (dM >> 1));
3599          *p++ = insn;
3600          goto done;
3601       }
3602       case ARMin_VUnaryD: {
3603          UInt dD   = dregNo(i->ARMin.VUnaryD.dst);
3604          UInt dM   = dregNo(i->ARMin.VUnaryD.src);
3605          UInt insn = 0;
3606          switch (i->ARMin.VUnaryD.op) {
3607             case ARMvfpu_COPY:
3608                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
3609                break;
3610             case ARMvfpu_ABS:
3611                insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
3612                break;
3613             case ARMvfpu_NEG:
3614                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
3615                break;
3616             case ARMvfpu_SQRT:
3617                insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
3618                break;
3619             default:
3620                goto bad;
3621          }
3622          *p++ = insn;
3623          goto done;
3624       }
3625       case ARMin_VUnaryS: {
3626          UInt fD   = fregNo(i->ARMin.VUnaryS.dst);
3627          UInt fM   = fregNo(i->ARMin.VUnaryS.src);
3628          UInt insn = 0;
3629          switch (i->ARMin.VUnaryS.op) {
3630             case ARMvfpu_COPY:
3631                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3632                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3633                                (fM >> 1));
3634                break;
3635             case ARMvfpu_ABS:
3636                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3637                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3638                                (fM >> 1));
3639                break;
3640             case ARMvfpu_NEG:
3641                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3642                                (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3643                                (fM >> 1));
3644                break;
3645             case ARMvfpu_SQRT:
3646                insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3647                                (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3648                                (fM >> 1));
3649                break;
3650             default:
3651                goto bad;
3652          }
3653          *p++ = insn;
3654          goto done;
3655       }
3656       case ARMin_VCmpD: {
3657          UInt dD   = dregNo(i->ARMin.VCmpD.argL);
3658          UInt dM   = dregNo(i->ARMin.VCmpD.argR);
3659          UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
3660          *p++ = insn;       /* FCMPD dD, dM */
3661          *p++ = 0xEEF1FA10; /* FMSTAT */
3662          goto done;
3663       }
3664       case ARMin_VCMovD: {
3665          UInt cc = (UInt)i->ARMin.VCMovD.cond;
3666          UInt dD = dregNo(i->ARMin.VCMovD.dst);
3667          UInt dM = dregNo(i->ARMin.VCMovD.src);
3668          vassert(cc < 16 && cc != ARMcc_AL);
3669          UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
3670          *p++ = insn;
3671          goto done;
3672       }
3673       case ARMin_VCMovS: {
3674          UInt cc = (UInt)i->ARMin.VCMovS.cond;
3675          UInt fD = fregNo(i->ARMin.VCMovS.dst);
3676          UInt fM = fregNo(i->ARMin.VCMovS.src);
3677          vassert(cc < 16 && cc != ARMcc_AL);
3678          UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
3679                               X0000,(fD >> 1),X1010,
3680                               BITS4(0,1,(fM & 1),0), (fM >> 1));
3681          *p++ = insn;
3682          goto done;
3683       }
3684       case ARMin_VCvtSD: {
3685          if (i->ARMin.VCvtSD.sToD) {
3686             UInt dD = dregNo(i->ARMin.VCvtSD.dst);
3687             UInt fM = fregNo(i->ARMin.VCvtSD.src);
3688             UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
3689                                  BITS4(1,1, (fM & 1), 0),
3690                                  (fM >> 1));
3691             *p++ = insn;
3692             goto done;
3693          } else {
3694             UInt fD = fregNo(i->ARMin.VCvtSD.dst);
3695             UInt dM = dregNo(i->ARMin.VCvtSD.src);
3696             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
3697                                  X0111, (fD >> 1),
3698                                  X1011, X1100, dM);
3699             *p++ = insn;
3700             goto done;
3701          }
3702       }
3703       case ARMin_VXferD: {
3704          UInt dD  = dregNo(i->ARMin.VXferD.dD);
3705          UInt rHi = iregNo(i->ARMin.VXferD.rHi);
3706          UInt rLo = iregNo(i->ARMin.VXferD.rLo);
3707          /* vmov dD, rLo, rHi is
3708             E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
3709             vmov rLo, rHi, dD is
3710             E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
3711          */
3712          UInt insn
3713             = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
3714                        rHi, rLo, 0xB,
3715                        BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
3716          *p++ = insn;
3717          goto done;
3718       }
3719       case ARMin_VXferS: {
3720          UInt fD  = fregNo(i->ARMin.VXferS.fD);
3721          UInt rLo = iregNo(i->ARMin.VXferS.rLo);
3722          /* vmov fD, rLo is
3723             E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
3724             vmov rLo, fD is
3725             E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
3726          */
3727          UInt insn
3728             = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
3729                        (fD >> 1) & 0xF, rLo, 0xA,
3730                        BITS4((fD & 1),0,0,1), 0);
3731          *p++ = insn;
3732          goto done;
3733       }
3734       case ARMin_VCvtID: {
3735          Bool iToD = i->ARMin.VCvtID.iToD;
3736          Bool syned = i->ARMin.VCvtID.syned;
3737          if (iToD && syned) {
3738             // FSITOD: I32S-in-freg to F64-in-dreg
3739             UInt regF = fregNo(i->ARMin.VCvtID.src);
3740             UInt regD = dregNo(i->ARMin.VCvtID.dst);
3741             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3742                                  X1011, BITS4(1,1,(regF & 1),0),
3743                                  (regF >> 1) & 0xF);
3744             *p++ = insn;
3745             goto done;
3746          }
3747          if (iToD && (!syned)) {
3748             // FUITOD: I32U-in-freg to F64-in-dreg
3749             UInt regF = fregNo(i->ARMin.VCvtID.src);
3750             UInt regD = dregNo(i->ARMin.VCvtID.dst);
3751             UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3752                                  X1011, BITS4(0,1,(regF & 1),0),
3753                                  (regF >> 1) & 0xF);
3754             *p++ = insn;
3755             goto done;
3756          }
3757          if ((!iToD) && syned) {
3758             // FTOSID: F64-in-dreg to I32S-in-freg
3759             UInt regD = dregNo(i->ARMin.VCvtID.src);
3760             UInt regF = fregNo(i->ARMin.VCvtID.dst);
3761             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3762                                  X1101, (regF >> 1) & 0xF,
3763                                  X1011, X0100, regD);
3764             *p++ = insn;
3765             goto done;
3766          }
3767          if ((!iToD) && (!syned)) {
3768             // FTOUID: F64-in-dreg to I32U-in-freg
3769             UInt regD = dregNo(i->ARMin.VCvtID.src);
3770             UInt regF = fregNo(i->ARMin.VCvtID.dst);
3771             UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3772                                  X1100, (regF >> 1) & 0xF,
3773                                  X1011, X0100, regD);
3774             *p++ = insn;
3775             goto done;
3776          }
3777          /*UNREACHED*/
3778          vassert(0);
3779       }
3780       case ARMin_FPSCR: {
3781          Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
3782          UInt iReg    = iregNo(i->ARMin.FPSCR.iReg);
3783          if (toFPSCR) {
3784             /* fmxr fpscr, iReg is EEE1 iReg A10 */
3785             *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
3786             goto done;
3787          }
3788          goto bad; // FPSCR -> iReg case currently ATC
3789       }
3790       case ARMin_MFence: {
3791          // It's not clear (to me) how these relate to the ARMv7
3792          // versions, so let's just use the v7 versions as they
3793          // are at least well documented.
3794          //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
3795          //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
3796          //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4  (ISB) */
3797          *p++ = 0xF57FF04F; /* DSB sy */
3798          *p++ = 0xF57FF05F; /* DMB sy */
3799          *p++ = 0xF57FF06F; /* ISB */
3800          goto done;
3801       }
3802       case ARMin_CLREX: {
3803          *p++ = 0xF57FF01F; /* clrex */
3804          goto done;
3805       }
3806 
3807       case ARMin_NLdStQ: {
3808          UInt regD = qregNo(i->ARMin.NLdStQ.dQ) << 1;
3809          UInt regN, regM;
3810          UInt D = regD >> 4;
3811          UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
3812          UInt insn;
3813          vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
3814          regD &= 0xF;
3815          if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
3816             regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
3817             regM = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
3818          } else {
3819             regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
3820             regM = 15;
3821          }
3822          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3823                               regN, regD, X1010, X1000, regM);
3824          *p++ = insn;
3825          goto done;
3826       }
3827       case ARMin_NLdStD: {
3828          UInt regD = dregNo(i->ARMin.NLdStD.dD);
3829          UInt regN, regM;
3830          UInt D = regD >> 4;
3831          UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
3832          UInt insn;
3833          vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
3834          regD &= 0xF;
3835          if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
3836             regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
3837             regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
3838          } else {
3839             regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
3840             regM = 15;
3841          }
3842          insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3843                               regN, regD, X0111, X1000, regM);
3844          *p++ = insn;
3845          goto done;
3846       }
3847       case ARMin_NUnaryS: {
3848          UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
3849          UInt regD, D;
3850          UInt regM, M;
3851          UInt size = i->ARMin.NUnaryS.size;
3852          UInt insn;
3853          UInt opc, opc1, opc2;
3854          switch (i->ARMin.NUnaryS.op) {
3855 	    case ARMneon_VDUP:
3856                if (i->ARMin.NUnaryS.size >= 16)
3857                   goto bad;
3858                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
3859                   goto bad;
3860                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3861                   goto bad;
3862                regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
3863                         ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
3864                         : dregNo(i->ARMin.NUnaryS.dst->reg);
3865                regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
3866                         ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
3867                         : dregNo(i->ARMin.NUnaryS.src->reg);
3868                D = regD >> 4;
3869                M = regM >> 4;
3870                regD &= 0xf;
3871                regM &= 0xf;
3872                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
3873                                (i->ARMin.NUnaryS.size & 0xf), regD,
3874                                X1100, BITS4(0,Q,M,0), regM);
3875                *p++ = insn;
3876                goto done;
3877             case ARMneon_SETELEM:
3878                regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
3879                                 dregNo(i->ARMin.NUnaryS.dst->reg);
3880                regM = iregNo(i->ARMin.NUnaryS.src->reg);
3881                M = regM >> 4;
3882                D = regD >> 4;
3883                regM &= 0xF;
3884                regD &= 0xF;
3885                if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
3886                   goto bad;
3887                switch (size) {
3888                   case 0:
3889                      if (i->ARMin.NUnaryS.dst->index > 7)
3890                         goto bad;
3891                      opc = X1000 | i->ARMin.NUnaryS.dst->index;
3892                      break;
3893                   case 1:
3894                      if (i->ARMin.NUnaryS.dst->index > 3)
3895                         goto bad;
3896                      opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
3897                      break;
3898                   case 2:
3899                      if (i->ARMin.NUnaryS.dst->index > 1)
3900                         goto bad;
3901                      opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
3902                      break;
3903                   default:
3904                      goto bad;
3905                }
3906                opc1 = (opc >> 2) & 3;
3907                opc2 = opc & 3;
3908                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
3909                                regD, regM, X1011,
3910                                BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
3911                *p++ = insn;
3912                goto done;
3913             case ARMneon_GETELEMU:
3914                regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
3915                                 dregNo(i->ARMin.NUnaryS.src->reg);
3916                regD = iregNo(i->ARMin.NUnaryS.dst->reg);
3917                M = regM >> 4;
3918                D = regD >> 4;
3919                regM &= 0xF;
3920                regD &= 0xF;
3921                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3922                   goto bad;
3923                switch (size) {
3924                   case 0:
3925                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
3926                         regM++;
3927                         i->ARMin.NUnaryS.src->index -= 8;
3928                      }
3929                      if (i->ARMin.NUnaryS.src->index > 7)
3930                         goto bad;
3931                      opc = X1000 | i->ARMin.NUnaryS.src->index;
3932                      break;
3933                   case 1:
3934                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
3935                         regM++;
3936                         i->ARMin.NUnaryS.src->index -= 4;
3937                      }
3938                      if (i->ARMin.NUnaryS.src->index > 3)
3939                         goto bad;
3940                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3941                      break;
3942                   case 2:
3943                      goto bad;
3944                   default:
3945                      goto bad;
3946                }
3947                opc1 = (opc >> 2) & 3;
3948                opc2 = opc & 3;
3949                insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
3950                                regM, regD, X1011,
3951                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3952                *p++ = insn;
3953                goto done;
3954             case ARMneon_GETELEMS:
3955                regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
3956                                 dregNo(i->ARMin.NUnaryS.src->reg);
3957                regD = iregNo(i->ARMin.NUnaryS.dst->reg);
3958                M = regM >> 4;
3959                D = regD >> 4;
3960                regM &= 0xF;
3961                regD &= 0xF;
3962                if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3963                   goto bad;
3964                switch (size) {
3965                   case 0:
3966                      if (Q && i->ARMin.NUnaryS.src->index > 7) {
3967                         regM++;
3968                         i->ARMin.NUnaryS.src->index -= 8;
3969                      }
3970                      if (i->ARMin.NUnaryS.src->index > 7)
3971                         goto bad;
3972                      opc = X1000 | i->ARMin.NUnaryS.src->index;
3973                      break;
3974                   case 1:
3975                      if (Q && i->ARMin.NUnaryS.src->index > 3) {
3976                         regM++;
3977                         i->ARMin.NUnaryS.src->index -= 4;
3978                      }
3979                      if (i->ARMin.NUnaryS.src->index > 3)
3980                         goto bad;
3981                      opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3982                      break;
3983                   case 2:
3984                      if (Q && i->ARMin.NUnaryS.src->index > 1) {
3985                         regM++;
3986                         i->ARMin.NUnaryS.src->index -= 2;
3987                      }
3988                      if (i->ARMin.NUnaryS.src->index > 1)
3989                         goto bad;
3990                      opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
3991                      break;
3992                   default:
3993                      goto bad;
3994                }
3995                opc1 = (opc >> 2) & 3;
3996                opc2 = opc & 3;
3997                insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
3998                                regM, regD, X1011,
3999                                BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
4000                *p++ = insn;
4001                goto done;
4002             default:
4003                goto bad;
4004          }
4005       }
4006       case ARMin_NUnary: {
4007          UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
4008          UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
4009                        ? (qregNo(i->ARMin.NUnary.dst) << 1)
4010                        : dregNo(i->ARMin.NUnary.dst);
4011          UInt regM, M;
4012          UInt D = regD >> 4;
4013          UInt sz1 = i->ARMin.NUnary.size >> 1;
4014          UInt sz2 = i->ARMin.NUnary.size & 1;
4015          UInt sz = i->ARMin.NUnary.size;
4016          UInt insn;
4017          UInt F = 0; /* TODO: floating point EQZ ??? */
4018          if (i->ARMin.NUnary.op != ARMneon_DUP) {
4019             regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
4020                      ? (qregNo(i->ARMin.NUnary.src) << 1)
4021                      : dregNo(i->ARMin.NUnary.src);
4022             M = regM >> 4;
4023          } else {
4024             regM = iregNo(i->ARMin.NUnary.src);
4025             M = regM >> 4;
4026          }
4027          regD &= 0xF;
4028          regM &= 0xF;
4029          switch (i->ARMin.NUnary.op) {
4030             case ARMneon_COPY: /* VMOV reg, reg */
4031                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
4032                                BITS4(M,Q,M,1), regM);
4033                break;
4034             case ARMneon_COPYN: /* VMOVN regD, regQ */
4035                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4036                                regD, X0010, BITS4(0,0,M,0), regM);
4037                break;
4038             case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
4039                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4040                                regD, X0010, BITS4(1,0,M,0), regM);
4041                break;
4042             case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
4043                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4044                                regD, X0010, BITS4(0,1,M,0), regM);
4045                break;
4046             case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
4047                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4048                                regD, X0010, BITS4(1,1,M,0), regM);
4049                break;
4050             case ARMneon_COPYLS: /* VMOVL regQ, regD */
4051                if (sz >= 3)
4052                   goto bad;
4053                insn = XXXXXXXX(0xF, X0010,
4054                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4055                                BITS4((sz == 0) ? 1 : 0,0,0,0),
4056                                regD, X1010, BITS4(0,0,M,1), regM);
4057                break;
4058             case ARMneon_COPYLU: /* VMOVL regQ, regD */
4059                if (sz >= 3)
4060                   goto bad;
4061                insn = XXXXXXXX(0xF, X0011,
4062                                BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4063                                BITS4((sz == 0) ? 1 : 0,0,0,0),
4064                                regD, X1010, BITS4(0,0,M,1), regM);
4065                break;
4066             case ARMneon_NOT: /* VMVN reg, reg*/
4067                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4068                                BITS4(1,Q,M,0), regM);
4069                break;
4070             case ARMneon_EQZ:
4071                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4072                                regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
4073                break;
4074             case ARMneon_CNT:
4075                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4076                                BITS4(0,Q,M,0), regM);
4077                break;
4078             case ARMneon_CLZ:
4079                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4080                                regD, X0100, BITS4(1,Q,M,0), regM);
4081                break;
4082             case ARMneon_CLS:
4083                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4084                                regD, X0100, BITS4(0,Q,M,0), regM);
4085                break;
4086             case ARMneon_ABS:
4087                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4088                                regD, X0011, BITS4(0,Q,M,0), regM);
4089                break;
4090             case ARMneon_DUP:
4091                sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
4092                sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
4093                vassert(sz1 + sz2 < 2);
4094                insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
4095                                X1011, BITS4(D,0,sz2,1), X0000);
4096                break;
4097             case ARMneon_REV16:
4098                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4099                                regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
4100                break;
4101             case ARMneon_REV32:
4102                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4103                                regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
4104                break;
4105             case ARMneon_REV64:
4106                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4107                                regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
4108                break;
4109             case ARMneon_PADDLU:
4110                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4111                                regD, X0010, BITS4(1,Q,M,0), regM);
4112                break;
4113             case ARMneon_PADDLS:
4114                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4115                                regD, X0010, BITS4(0,Q,M,0), regM);
4116                break;
4117             case ARMneon_VQSHLNUU:
4118                insn = XXXXXXXX(0xF, X0011,
4119                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4120                                sz & 0xf, regD, X0111,
4121                                BITS4(sz >> 6,Q,M,1), regM);
4122                break;
4123             case ARMneon_VQSHLNSS:
4124                insn = XXXXXXXX(0xF, X0010,
4125                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4126                                sz & 0xf, regD, X0111,
4127                                BITS4(sz >> 6,Q,M,1), regM);
4128                break;
4129             case ARMneon_VQSHLNUS:
4130                insn = XXXXXXXX(0xF, X0011,
4131                                (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4132                                sz & 0xf, regD, X0110,
4133                                BITS4(sz >> 6,Q,M,1), regM);
4134                break;
4135             case ARMneon_VCVTFtoS:
4136                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4137                                BITS4(0,Q,M,0), regM);
4138                break;
4139             case ARMneon_VCVTFtoU:
4140                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4141                                BITS4(1,Q,M,0), regM);
4142                break;
4143             case ARMneon_VCVTStoF:
4144                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4145                                BITS4(0,Q,M,0), regM);
4146                break;
4147             case ARMneon_VCVTUtoF:
4148                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4149                                BITS4(1,Q,M,0), regM);
4150                break;
4151             case ARMneon_VCVTFtoFixedU:
4152                sz1 = (sz >> 5) & 1;
4153                sz2 = (sz >> 4) & 1;
4154                sz &= 0xf;
4155                insn = XXXXXXXX(0xF, X0011,
4156                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
4157                                BITS4(0,Q,M,1), regM);
4158                break;
4159             case ARMneon_VCVTFtoFixedS:
4160                sz1 = (sz >> 5) & 1;
4161                sz2 = (sz >> 4) & 1;
4162                sz &= 0xf;
4163                insn = XXXXXXXX(0xF, X0010,
4164                                BITS4(1,D,sz1,sz2), sz, regD, X1111,
4165                                BITS4(0,Q,M,1), regM);
4166                break;
4167             case ARMneon_VCVTFixedUtoF:
4168                sz1 = (sz >> 5) & 1;
4169                sz2 = (sz >> 4) & 1;
4170                sz &= 0xf;
4171                insn = XXXXXXXX(0xF, X0011,
4172                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
4173                                BITS4(0,Q,M,1), regM);
4174                break;
4175             case ARMneon_VCVTFixedStoF:
4176                sz1 = (sz >> 5) & 1;
4177                sz2 = (sz >> 4) & 1;
4178                sz &= 0xf;
4179                insn = XXXXXXXX(0xF, X0010,
4180                                BITS4(1,D,sz1,sz2), sz, regD, X1110,
4181                                BITS4(0,Q,M,1), regM);
4182                break;
4183             case ARMneon_VCVTF32toF16:
4184                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
4185                                BITS4(0,0,M,0), regM);
4186                break;
4187             case ARMneon_VCVTF16toF32:
4188                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
4189                                BITS4(0,0,M,0), regM);
4190                break;
4191             case ARMneon_VRECIP:
4192                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4193                                BITS4(0,Q,M,0), regM);
4194                break;
4195             case ARMneon_VRECIPF:
4196                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4197                                BITS4(0,Q,M,0), regM);
4198                break;
4199             case ARMneon_VABSFP:
4200                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4201                                BITS4(0,Q,M,0), regM);
4202                break;
4203             case ARMneon_VRSQRTEFP:
4204                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4205                                BITS4(1,Q,M,0), regM);
4206                break;
4207             case ARMneon_VRSQRTE:
4208                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4209                                BITS4(1,Q,M,0), regM);
4210                break;
4211             case ARMneon_VNEGF:
4212                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4213                                BITS4(1,Q,M,0), regM);
4214                break;
4215 
4216             default:
4217                goto bad;
4218          }
4219          *p++ = insn;
4220          goto done;
4221       }
4222       case ARMin_NDual: {
4223          UInt Q = i->ARMin.NDual.Q ? 1 : 0;
4224          UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
4225                        ? (qregNo(i->ARMin.NDual.arg1) << 1)
4226                        : dregNo(i->ARMin.NDual.arg1);
4227          UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
4228                        ? (qregNo(i->ARMin.NDual.arg2) << 1)
4229                        : dregNo(i->ARMin.NDual.arg2);
4230          UInt D = regD >> 4;
4231          UInt M = regM >> 4;
4232          UInt sz1 = i->ARMin.NDual.size >> 1;
4233          UInt sz2 = i->ARMin.NDual.size & 1;
4234          UInt insn;
4235          regD &= 0xF;
4236          regM &= 0xF;
4237          switch (i->ARMin.NDual.op) {
4238             case ARMneon_TRN: /* VTRN reg, reg */
4239                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4240                                regD, X0000, BITS4(1,Q,M,0), regM);
4241                break;
4242             case ARMneon_ZIP: /* VZIP reg, reg */
4243                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4244                                regD, X0001, BITS4(1,Q,M,0), regM);
4245                break;
4246             case ARMneon_UZP: /* VUZP reg, reg */
4247                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4248                                regD, X0001, BITS4(0,Q,M,0), regM);
4249                break;
4250             default:
4251                goto bad;
4252          }
4253          *p++ = insn;
4254          goto done;
4255       }
4256       case ARMin_NBinary: {
4257          UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
4258          UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
4259                        ? (qregNo(i->ARMin.NBinary.dst) << 1)
4260                        : dregNo(i->ARMin.NBinary.dst);
4261          UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
4262                        ? (qregNo(i->ARMin.NBinary.argL) << 1)
4263                        : dregNo(i->ARMin.NBinary.argL);
4264          UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
4265                        ? (qregNo(i->ARMin.NBinary.argR) << 1)
4266                        : dregNo(i->ARMin.NBinary.argR);
4267          UInt sz1 = i->ARMin.NBinary.size >> 1;
4268          UInt sz2 = i->ARMin.NBinary.size & 1;
4269          UInt D = regD >> 4;
4270          UInt N = regN >> 4;
4271          UInt M = regM >> 4;
4272          UInt insn;
4273          regD &= 0xF;
4274          regM &= 0xF;
4275          regN &= 0xF;
4276          switch (i->ARMin.NBinary.op) {
4277             case ARMneon_VAND: /* VAND reg, reg, reg */
4278                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
4279                                BITS4(N,Q,M,1), regM);
4280                break;
4281             case ARMneon_VORR: /* VORR reg, reg, reg*/
4282                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
4283                                BITS4(N,Q,M,1), regM);
4284                break;
4285             case ARMneon_VXOR: /* VEOR reg, reg, reg */
4286                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
4287                                BITS4(N,Q,M,1), regM);
4288                break;
4289             case ARMneon_VADD: /* VADD reg, reg, reg */
4290                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4291                                X1000, BITS4(N,Q,M,0), regM);
4292                break;
4293             case ARMneon_VSUB: /* VSUB reg, reg, reg */
4294                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4295                                X1000, BITS4(N,Q,M,0), regM);
4296                break;
4297             case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
4298                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4299                                X0110, BITS4(N,Q,M,1), regM);
4300                break;
4301             case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
4302                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4303                                X0110, BITS4(N,Q,M,1), regM);
4304                break;
4305             case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
4306                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4307                                X0110, BITS4(N,Q,M,0), regM);
4308                break;
4309             case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
4310                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4311                                X0110, BITS4(N,Q,M,0), regM);
4312                break;
4313             case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
4314                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4315                                X0001, BITS4(N,Q,M,0), regM);
4316                break;
4317             case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
4318                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4319                                X0001, BITS4(N,Q,M,0), regM);
4320                break;
4321             case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
4322                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4323                                X0000, BITS4(N,Q,M,1), regM);
4324                break;
4325             case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
4326                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4327                                X0000, BITS4(N,Q,M,1), regM);
4328                break;
4329             case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
4330                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4331                                X0010, BITS4(N,Q,M,1), regM);
4332                break;
4333             case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
4334                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4335                                X0010, BITS4(N,Q,M,1), regM);
4336                break;
4337             case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
4338                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4339                                X0011, BITS4(N,Q,M,0), regM);
4340                break;
4341             case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
4342                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4343                                X0011, BITS4(N,Q,M,0), regM);
4344                break;
4345             case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
4346                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4347                                X0011, BITS4(N,Q,M,1), regM);
4348                break;
4349             case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
4350                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4351                                X0011, BITS4(N,Q,M,1), regM);
4352                break;
4353             case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
4354                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4355                                X1000, BITS4(N,Q,M,1), regM);
4356                break;
4357             case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
4358                if (i->ARMin.NBinary.size >= 16)
4359                   goto bad;
4360                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
4361                                i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
4362                                regM);
4363                break;
4364             case ARMneon_VMUL:
4365                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4366                                X1001, BITS4(N,Q,M,1), regM);
4367                break;
4368             case ARMneon_VMULLU:
4369                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
4370                                X1100, BITS4(N,0,M,0), regM);
4371                break;
4372             case ARMneon_VMULLS:
4373                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4374                                X1100, BITS4(N,0,M,0), regM);
4375                break;
4376             case ARMneon_VMULP:
4377                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4378                                X1001, BITS4(N,Q,M,1), regM);
4379                break;
4380             case ARMneon_VMULFP:
4381                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4382                                X1101, BITS4(N,Q,M,1), regM);
4383                break;
4384             case ARMneon_VMULLP:
4385                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4386                                X1110, BITS4(N,0,M,0), regM);
4387                break;
4388             case ARMneon_VQDMULH:
4389                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4390                                X1011, BITS4(N,Q,M,0), regM);
4391                break;
4392             case ARMneon_VQRDMULH:
4393                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4394                                X1011, BITS4(N,Q,M,0), regM);
4395                break;
4396             case ARMneon_VQDMULL:
4397                insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4398                                X1101, BITS4(N,0,M,0), regM);
4399                break;
4400             case ARMneon_VTBL:
4401                insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
4402                                X1000, BITS4(N,0,M,0), regM);
4403                break;
4404             case ARMneon_VPADD:
4405                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4406                                X1011, BITS4(N,Q,M,1), regM);
4407                break;
4408             case ARMneon_VPADDFP:
4409                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4410                                X1101, BITS4(N,Q,M,0), regM);
4411                break;
4412             case ARMneon_VPMINU:
4413                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4414                                X1010, BITS4(N,Q,M,1), regM);
4415                break;
4416             case ARMneon_VPMINS:
4417                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4418                                X1010, BITS4(N,Q,M,1), regM);
4419                break;
4420             case ARMneon_VPMAXU:
4421                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4422                                X1010, BITS4(N,Q,M,0), regM);
4423                break;
4424             case ARMneon_VPMAXS:
4425                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4426                                X1010, BITS4(N,Q,M,0), regM);
4427                break;
4428             case ARMneon_VADDFP: /* VADD reg, reg, reg */
4429                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4430                                X1101, BITS4(N,Q,M,0), regM);
4431                break;
4432             case ARMneon_VSUBFP: /* VADD reg, reg, reg */
4433                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4434                                X1101, BITS4(N,Q,M,0), regM);
4435                break;
4436             case ARMneon_VABDFP: /* VABD reg, reg, reg */
4437                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4438                                X1101, BITS4(N,Q,M,0), regM);
4439                break;
4440             case ARMneon_VMINF:
4441                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4442                                X1111, BITS4(N,Q,M,0), regM);
4443                break;
4444             case ARMneon_VMAXF:
4445                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4446                                X1111, BITS4(N,Q,M,0), regM);
4447                break;
4448             case ARMneon_VPMINF:
4449                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4450                                X1111, BITS4(N,Q,M,0), regM);
4451                break;
4452             case ARMneon_VPMAXF:
4453                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4454                                X1111, BITS4(N,Q,M,0), regM);
4455                break;
4456             case ARMneon_VRECPS:
4457                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
4458                                BITS4(N,Q,M,1), regM);
4459                break;
4460             case ARMneon_VCGTF:
4461                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
4462                                BITS4(N,Q,M,0), regM);
4463                break;
4464             case ARMneon_VCGEF:
4465                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
4466                                BITS4(N,Q,M,0), regM);
4467                break;
4468             case ARMneon_VCEQF:
4469                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
4470                                BITS4(N,Q,M,0), regM);
4471                break;
4472             case ARMneon_VRSQRTS:
4473                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
4474                                BITS4(N,Q,M,1), regM);
4475                break;
4476             default:
4477                goto bad;
4478          }
4479          *p++ = insn;
4480          goto done;
4481       }
4482       case ARMin_NShift: {
4483          UInt Q = i->ARMin.NShift.Q ? 1 : 0;
4484          UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
4485                        ? (qregNo(i->ARMin.NShift.dst) << 1)
4486                        : dregNo(i->ARMin.NShift.dst);
4487          UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
4488                        ? (qregNo(i->ARMin.NShift.argL) << 1)
4489                        : dregNo(i->ARMin.NShift.argL);
4490          UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
4491                        ? (qregNo(i->ARMin.NShift.argR) << 1)
4492                        : dregNo(i->ARMin.NShift.argR);
4493          UInt sz1 = i->ARMin.NShift.size >> 1;
4494          UInt sz2 = i->ARMin.NShift.size & 1;
4495          UInt D = regD >> 4;
4496          UInt N = regN >> 4;
4497          UInt M = regM >> 4;
4498          UInt insn;
4499          regD &= 0xF;
4500          regM &= 0xF;
4501          regN &= 0xF;
4502          switch (i->ARMin.NShift.op) {
4503             case ARMneon_VSHL:
4504                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4505                                X0100, BITS4(N,Q,M,0), regM);
4506                break;
4507             case ARMneon_VSAL:
4508                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4509                                X0100, BITS4(N,Q,M,0), regM);
4510                break;
4511             case ARMneon_VQSHL:
4512                insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4513                                X0100, BITS4(N,Q,M,1), regM);
4514                break;
4515             case ARMneon_VQSAL:
4516                insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4517                                X0100, BITS4(N,Q,M,1), regM);
4518                break;
4519             default:
4520                goto bad;
4521          }
4522          *p++ = insn;
4523          goto done;
4524       }
4525       case ARMin_NShl64: {
4526          HReg regDreg = i->ARMin.NShl64.dst;
4527          HReg regMreg = i->ARMin.NShl64.src;
4528          UInt amt     = i->ARMin.NShl64.amt;
4529          vassert(amt >= 1 && amt <= 63);
4530          vassert(hregClass(regDreg) == HRcFlt64);
4531          vassert(hregClass(regMreg) == HRcFlt64);
4532          UInt regD = dregNo(regDreg);
4533          UInt regM = dregNo(regMreg);
4534          UInt D    = (regD >> 4) & 1;
4535          UInt Vd   = regD & 0xF;
4536          UInt L    = 1;
4537          UInt Q    = 0; /* always 64-bit */
4538          UInt M    = (regM >> 4) & 1;
4539          UInt Vm   = regM & 0xF;
4540          UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
4541                               amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
4542          *p++ = insn;
4543          goto done;
4544       }
4545       case ARMin_NeonImm: {
4546          UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
4547          UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
4548                           dregNo(i->ARMin.NeonImm.dst);
4549          UInt D = regD >> 4;
4550          UInt imm = i->ARMin.NeonImm.imm->imm8;
4551          UInt tp = i->ARMin.NeonImm.imm->type;
4552          UInt j = imm >> 7;
4553          UInt imm3 = (imm >> 4) & 0x7;
4554          UInt imm4 = imm & 0xF;
4555          UInt cmode, op;
4556          UInt insn;
4557          regD &= 0xF;
4558          if (tp == 9)
4559             op = 1;
4560          else
4561             op = 0;
4562          switch (tp) {
4563             case 0:
4564             case 1:
4565             case 2:
4566             case 3:
4567             case 4:
4568             case 5:
4569                cmode = tp << 1;
4570                break;
4571             case 9:
4572             case 6:
4573                cmode = 14;
4574                break;
4575             case 7:
4576                cmode = 12;
4577                break;
4578             case 8:
4579                cmode = 13;
4580                break;
4581             case 10:
4582                cmode = 15;
4583                break;
4584             default:
4585                vpanic("ARMin_NeonImm");
4586 
4587          }
4588          insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
4589                          cmode, BITS4(0,Q,op,1), imm4);
4590          *p++ = insn;
4591          goto done;
4592       }
4593       case ARMin_NCMovQ: {
4594          UInt cc = (UInt)i->ARMin.NCMovQ.cond;
4595          UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
4596          UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
4597          UInt vM = qM & 0xF;
4598          UInt vD = qD & 0xF;
4599          UInt M  = (qM >> 4) & 1;
4600          UInt D  = (qD >> 4) & 1;
4601          vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
4602          /* b!cc here+8: !cc A00 0000 */
4603          UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
4604          *p++ = insn;
4605          /* vmov qD, qM */
4606          insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
4607                          vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
4608          *p++ = insn;
4609          goto done;
4610       }
4611       case ARMin_Add32: {
4612          UInt regD = iregNo(i->ARMin.Add32.rD);
4613          UInt regN = iregNo(i->ARMin.Add32.rN);
4614          UInt imm32 = i->ARMin.Add32.imm32;
4615          vassert(regD != regN);
4616          /* MOV regD, imm32 */
4617          p = imm32_to_iregNo((UInt *)p, regD, imm32);
4618          /* ADD regD, regN, regD */
4619          UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
4620          *p++ = insn;
4621          goto done;
4622       }
4623 
4624       case ARMin_EvCheck: {
4625          /* We generate:
4626                ldr  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
4627                subs r12, r12, #1  (A1)
4628                str  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
4629                bpl  nofail
4630                ldr  r12, [r8 + #0]   0 == offsetof(host_EvC_FAILADDR)
4631                bx   r12
4632               nofail:
4633          */
4634          UInt* p0 = p;
4635          p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4636                                 i->ARMin.EvCheck.amCounter);
4637          *p++ = 0xE25CC001; /* subs r12, r12, #1 */
4638          p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
4639                                 i->ARMin.EvCheck.amCounter);
4640          *p++ = 0x5A000001; /* bpl nofail */
4641          p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4642                                 i->ARMin.EvCheck.amFailAddr);
4643          *p++ = 0xE12FFF1C; /* bx r12 */
4644          /* nofail: */
4645 
4646          /* Crosscheck */
4647          vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
4648          goto done;
4649       }
4650 
4651       case ARMin_ProfInc: {
4652          /* We generate:
4653               (ctrP is unknown now, so use 0x65556555 in the
4654               expectation that a later call to LibVEX_patchProfCtr
4655               will be used to fill in the immediate fields once the
4656               right value is known.)
4657             movw r12, lo16(0x65556555)
4658             movt r12, lo16(0x65556555)
4659             ldr  r11, [r12]
4660             adds r11, r11, #1
4661             str  r11, [r12]
4662             ldr  r11, [r12+4]
4663             adc  r11, r11, #0
4664             str  r11, [r12+4]
4665          */
4666          p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555);
4667          *p++ = 0xE59CB000;
4668          *p++ = 0xE29BB001;
4669          *p++ = 0xE58CB000;
4670          *p++ = 0xE59CB004;
4671          *p++ = 0xE2ABB000;
4672          *p++ = 0xE58CB004;
4673          /* Tell the caller .. */
4674          vassert(!(*is_profInc));
4675          *is_profInc = True;
4676          goto done;
4677       }
4678 
4679       /* ... */
4680       default:
4681          goto bad;
4682     }
4683 
4684   bad:
4685    ppARMInstr(i);
4686    vpanic("emit_ARMInstr");
4687    /*NOTREACHED*/
4688 
4689   done:
4690    vassert(((UChar*)p) - &buf[0] <= 32);
4691    return ((UChar*)p) - &buf[0];
4692 }
4693 
4694 
4695 /* How big is an event check?  See case for ARMin_EvCheck in
4696    emit_ARMInstr just above.  That crosschecks what this returns, so
4697    we can tell if we're inconsistent. */
evCheckSzB_ARM(void)4698 Int evCheckSzB_ARM ( void )
4699 {
4700    return 24;
4701 }
4702 
4703 
4704 /* NB: what goes on here has to be very closely coordinated with the
4705    emitInstr case for XDirect, above. */
chainXDirect_ARM(void * place_to_chain,void * disp_cp_chain_me_EXPECTED,void * place_to_jump_to)4706 VexInvalRange chainXDirect_ARM ( void* place_to_chain,
4707                                  void* disp_cp_chain_me_EXPECTED,
4708                                  void* place_to_jump_to )
4709 {
4710    /* What we're expecting to see is:
4711         movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
4712         movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
4713         blx  r12
4714       viz
4715         <8 bytes generated by imm32_to_iregNo_EXACTLY2>
4716         E1 2F FF 3C
4717    */
4718    UInt* p = (UInt*)place_to_chain;
4719    vassert(0 == (3 & (HWord)p));
4720    vassert(is_imm32_to_iregNo_EXACTLY2(
4721               p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED)));
4722    vassert(p[2] == 0xE12FFF3C);
4723    /* And what we want to change it to is either:
4724         (general case)
4725           movw r12, lo16(place_to_jump_to)
4726           movt r12, hi16(place_to_jump_to)
4727           bx   r12
4728         viz
4729           <8 bytes generated by imm32_to_iregNo_EXACTLY2>
4730           E1 2F FF 1C
4731       ---OR---
4732         in the case where the displacement falls within 26 bits
4733           b disp24; undef; undef
4734         viz
4735           EA <3 bytes == disp24>
4736           FF 00 00 00
4737           FF 00 00 00
4738 
4739       In both cases the replacement has the same length as the original.
4740       To remain sane & verifiable,
4741       (1) limit the displacement for the short form to
4742           (say) +/- 30 million, so as to avoid wraparound
4743           off-by-ones
4744       (2) even if the short form is applicable, once every (say)
4745           1024 times use the long form anyway, so as to maintain
4746           verifiability
4747    */
4748 
4749    /* This is the delta we need to put into a B insn.  It's relative
4750       to the start of the next-but-one insn, hence the -8.  */
4751    Long delta   = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)8;
4752    Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
4753    vassert(0 == (delta & (Long)3));
4754 
4755    static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4756    if (shortOK) {
4757       shortCTR++; // thread safety bleh
4758       if (0 == (shortCTR & 0x3FF)) {
4759          shortOK = False;
4760          if (0)
4761             vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
4762                        "using long form\n", shortCTR);
4763       }
4764    }
4765 
4766    /* And make the modifications. */
4767    if (shortOK) {
4768       Int simm24 = (Int)(delta >> 2);
4769       vassert(simm24 == ((simm24 << 8) >> 8));
4770       p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
4771       p[1] = 0xFF000000;
4772       p[2] = 0xFF000000;
4773    } else {
4774       (void)imm32_to_iregNo_EXACTLY2(
4775                p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to));
4776       p[2] = 0xE12FFF1C;
4777    }
4778 
4779    VexInvalRange vir = {(HWord)p, 12};
4780    return vir;
4781 }
4782 
4783 
4784 /* NB: what goes on here has to be very closely coordinated with the
4785    emitInstr case for XDirect, above. */
unchainXDirect_ARM(void * place_to_unchain,void * place_to_jump_to_EXPECTED,void * disp_cp_chain_me)4786 VexInvalRange unchainXDirect_ARM ( void* place_to_unchain,
4787                                    void* place_to_jump_to_EXPECTED,
4788                                    void* disp_cp_chain_me )
4789 {
4790    /* What we're expecting to see is:
4791         (general case)
4792           movw r12, lo16(place_to_jump_to_EXPECTED)
4793           movt r12, lo16(place_to_jump_to_EXPECTED)
4794           bx   r12
4795         viz
4796           <8 bytes generated by imm32_to_iregNo_EXACTLY2>
4797           E1 2F FF 1C
4798       ---OR---
4799         in the case where the displacement falls within 26 bits
4800           b disp24; undef; undef
4801         viz
4802           EA <3 bytes == disp24>
4803           FF 00 00 00
4804           FF 00 00 00
4805    */
4806    UInt* p = (UInt*)place_to_unchain;
4807    vassert(0 == (3 & (HWord)p));
4808 
4809    Bool valid = False;
4810    if (is_imm32_to_iregNo_EXACTLY2(
4811           p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to_EXPECTED))
4812        && p[2] == 0xE12FFF1C) {
4813       valid = True; /* it's the long form */
4814       if (0)
4815          vex_printf("QQQ unchainXDirect_ARM: found long form\n");
4816    } else
4817    if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
4818       /* It's the short form.  Check the displacement is right. */
4819       Int simm24 = p[0] & 0x00FFFFFF;
4820       simm24 <<= 8; simm24 >>= 8;
4821       if ((UChar*)p + (simm24 << 2) + 8 == (UChar*)place_to_jump_to_EXPECTED) {
4822          valid = True;
4823          if (0)
4824             vex_printf("QQQ unchainXDirect_ARM: found short form\n");
4825       }
4826    }
4827    vassert(valid);
4828 
4829    /* And what we want to change it to is:
4830         movw r12, lo16(disp_cp_chain_me)
4831         movt r12, hi16(disp_cp_chain_me)
4832         blx  r12
4833       viz
4834         <8 bytes generated by imm32_to_iregNo_EXACTLY2>
4835         E1 2F FF 3C
4836    */
4837    (void)imm32_to_iregNo_EXACTLY2(
4838             p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me));
4839    p[2] = 0xE12FFF3C;
4840    VexInvalRange vir = {(HWord)p, 12};
4841    return vir;
4842 }
4843 
4844 
4845 /* Patch the counter address into a profile inc point, as previously
4846    created by the ARMin_ProfInc case for emit_ARMInstr. */
patchProfInc_ARM(void * place_to_patch,ULong * location_of_counter)4847 VexInvalRange patchProfInc_ARM ( void*  place_to_patch,
4848                                  ULong* location_of_counter )
4849 {
4850    vassert(sizeof(ULong*) == 4);
4851    UInt* p = (UInt*)place_to_patch;
4852    vassert(0 == (3 & (HWord)p));
4853    vassert(is_imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555));
4854    vassert(p[2] == 0xE59CB000);
4855    vassert(p[3] == 0xE29BB001);
4856    vassert(p[4] == 0xE58CB000);
4857    vassert(p[5] == 0xE59CB004);
4858    vassert(p[6] == 0xE2ABB000);
4859    vassert(p[7] == 0xE58CB004);
4860    imm32_to_iregNo_EXACTLY2(p, /*r*/12,
4861                             (UInt)Ptr_to_ULong(location_of_counter));
4862    VexInvalRange vir = {(HWord)p, 8};
4863    return vir;
4864 }
4865 
4866 
4867 #undef BITS4
4868 #undef X0000
4869 #undef X0001
4870 #undef X0010
4871 #undef X0011
4872 #undef X0100
4873 #undef X0101
4874 #undef X0110
4875 #undef X0111
4876 #undef X1000
4877 #undef X1001
4878 #undef X1010
4879 #undef X1011
4880 #undef X1100
4881 #undef X1101
4882 #undef X1110
4883 #undef X1111
4884 #undef XXXXX___
4885 #undef XXXXXX__
4886 #undef XXX___XX
4887 #undef XXXXX__X
4888 #undef XXXXXXXX
4889 #undef XX______
4890 
4891 /*---------------------------------------------------------------*/
4892 /*--- end                                     host_arm_defs.c ---*/
4893 /*---------------------------------------------------------------*/
4894