1
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_arm_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #include "libvex_basictypes.h"
32 #include "libvex_emnote.h"
33 #include "libvex_guest_arm.h"
34 #include "libvex_ir.h"
35 #include "libvex.h"
36
37 #include "main_util.h"
38 #include "main_globals.h"
39 #include "guest_generic_bb_to_IR.h"
40 #include "guest_arm_defs.h"
41 #include "guest_arm64_defs.h" /* for crypto helper functions */
42
43
44 /* This file contains helper functions for arm guest code. Calls to
45 these functions are generated by the back end. These calls are of
46 course in the host machine code and this file will be compiled to
47 host machine code, so that all makes sense.
48
49 Only change the signatures of these helper functions very
50 carefully. If you change the signature here, you'll have to change
51 the parameters passed to it in the IR calls constructed by
52 guest-arm/toIR.c.
53 */
54
55
56 /* Set to 1 to get detailed profiling info about individual N, Z, C
57 and V flag evaluation. */
58 #define PROFILE_NZCV_FLAGS 0
59
60 #if PROFILE_NZCV_FLAGS
61
62 static UInt tab_n_eval[ARMG_CC_OP_NUMBER];
63 static UInt tab_z_eval[ARMG_CC_OP_NUMBER];
64 static UInt tab_c_eval[ARMG_CC_OP_NUMBER];
65 static UInt tab_v_eval[ARMG_CC_OP_NUMBER];
66 static UInt initted = 0;
67 static UInt tot_evals = 0;
68
initCounts(void)69 static void initCounts ( void )
70 {
71 UInt i;
72 for (i = 0; i < ARMG_CC_OP_NUMBER; i++) {
73 tab_n_eval[i] = tab_z_eval[i] = tab_c_eval[i] = tab_v_eval[i] = 0;
74 }
75 initted = 1;
76 }
77
showCounts(void)78 static void showCounts ( void )
79 {
80 UInt i;
81 vex_printf("\n N Z C V\n");
82 vex_printf( "---------------------------------------------------\n");
83 for (i = 0; i < ARMG_CC_OP_NUMBER; i++) {
84 vex_printf("CC_OP=%d %9d %9d %9d %9d\n",
85 i,
86 tab_n_eval[i], tab_z_eval[i],
87 tab_c_eval[i], tab_v_eval[i] );
88 }
89 }
90
91 #define NOTE_N_EVAL(_cc_op) NOTE_EVAL(_cc_op, tab_n_eval)
92 #define NOTE_Z_EVAL(_cc_op) NOTE_EVAL(_cc_op, tab_z_eval)
93 #define NOTE_C_EVAL(_cc_op) NOTE_EVAL(_cc_op, tab_c_eval)
94 #define NOTE_V_EVAL(_cc_op) NOTE_EVAL(_cc_op, tab_v_eval)
95
96 #define NOTE_EVAL(_cc_op, _tab) \
97 do { \
98 if (!initted) initCounts(); \
99 vassert( ((UInt)(_cc_op)) < ARMG_CC_OP_NUMBER); \
100 _tab[(UInt)(_cc_op)]++; \
101 tot_evals++; \
102 if (0 == (tot_evals & 0xFFFFF)) \
103 showCounts(); \
104 } while (0)
105
106 #endif /* PROFILE_NZCV_FLAGS */
107
108
109 /* Calculate the N flag from the supplied thunk components, in the
110 least significant bit of the word. Returned bits 31:1 are zero. */
111 static
armg_calculate_flag_n(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_dep3)112 UInt armg_calculate_flag_n ( UInt cc_op, UInt cc_dep1,
113 UInt cc_dep2, UInt cc_dep3 )
114 {
115 # if PROFILE_NZCV_FLAGS
116 NOTE_N_EVAL(cc_op);
117 # endif
118
119 switch (cc_op) {
120 case ARMG_CC_OP_COPY: {
121 /* (nzcv:28x0, unused, unused) */
122 UInt nf = (cc_dep1 >> ARMG_CC_SHIFT_N) & 1;
123 return nf;
124 }
125 case ARMG_CC_OP_ADD: {
126 /* (argL, argR, unused) */
127 UInt argL = cc_dep1;
128 UInt argR = cc_dep2;
129 UInt res = argL + argR;
130 UInt nf = res >> 31;
131 return nf;
132 }
133 case ARMG_CC_OP_SUB: {
134 /* (argL, argR, unused) */
135 UInt argL = cc_dep1;
136 UInt argR = cc_dep2;
137 UInt res = argL - argR;
138 UInt nf = res >> 31;
139 return nf;
140 }
141 case ARMG_CC_OP_ADC: {
142 /* (argL, argR, oldC) */
143 UInt argL = cc_dep1;
144 UInt argR = cc_dep2;
145 UInt oldC = cc_dep3;
146 vassert((oldC & ~1) == 0);
147 UInt res = argL + argR + oldC;
148 UInt nf = res >> 31;
149 return nf;
150 }
151 case ARMG_CC_OP_SBB: {
152 /* (argL, argR, oldC) */
153 UInt argL = cc_dep1;
154 UInt argR = cc_dep2;
155 UInt oldC = cc_dep3;
156 vassert((oldC & ~1) == 0);
157 UInt res = argL - argR - (oldC ^ 1);
158 UInt nf = res >> 31;
159 return nf;
160 }
161 case ARMG_CC_OP_LOGIC: {
162 /* (res, shco, oldV) */
163 UInt res = cc_dep1;
164 UInt nf = res >> 31;
165 return nf;
166 }
167 case ARMG_CC_OP_MUL: {
168 /* (res, unused, oldC:oldV) */
169 UInt res = cc_dep1;
170 UInt nf = res >> 31;
171 return nf;
172 }
173 case ARMG_CC_OP_MULL: {
174 /* (resLo32, resHi32, oldC:oldV) */
175 UInt resHi32 = cc_dep2;
176 UInt nf = resHi32 >> 31;
177 return nf;
178 }
179 default:
180 /* shouldn't really make these calls from generated code */
181 vex_printf("armg_calculate_flag_n"
182 "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
183 cc_op, cc_dep1, cc_dep2, cc_dep3 );
184 vpanic("armg_calculate_flags_n");
185 }
186 }
187
188
189 /* Calculate the Z flag from the supplied thunk components, in the
190 least significant bit of the word. Returned bits 31:1 are zero. */
191 static
armg_calculate_flag_z(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_dep3)192 UInt armg_calculate_flag_z ( UInt cc_op, UInt cc_dep1,
193 UInt cc_dep2, UInt cc_dep3 )
194 {
195 # if PROFILE_NZCV_FLAGS
196 NOTE_Z_EVAL(cc_op);
197 # endif
198
199 switch (cc_op) {
200 case ARMG_CC_OP_COPY: {
201 /* (nzcv:28x0, unused, unused) */
202 UInt zf = (cc_dep1 >> ARMG_CC_SHIFT_Z) & 1;
203 return zf;
204 }
205 case ARMG_CC_OP_ADD: {
206 /* (argL, argR, unused) */
207 UInt argL = cc_dep1;
208 UInt argR = cc_dep2;
209 UInt res = argL + argR;
210 UInt zf = res == 0;
211 return zf;
212 }
213 case ARMG_CC_OP_SUB: {
214 /* (argL, argR, unused) */
215 UInt argL = cc_dep1;
216 UInt argR = cc_dep2;
217 UInt res = argL - argR;
218 UInt zf = res == 0;
219 return zf;
220 }
221 case ARMG_CC_OP_ADC: {
222 /* (argL, argR, oldC) */
223 UInt argL = cc_dep1;
224 UInt argR = cc_dep2;
225 UInt oldC = cc_dep3;
226 vassert((oldC & ~1) == 0);
227 UInt res = argL + argR + oldC;
228 UInt zf = res == 0;
229 return zf;
230 }
231 case ARMG_CC_OP_SBB: {
232 /* (argL, argR, oldC) */
233 UInt argL = cc_dep1;
234 UInt argR = cc_dep2;
235 UInt oldC = cc_dep3;
236 vassert((oldC & ~1) == 0);
237 UInt res = argL - argR - (oldC ^ 1);
238 UInt zf = res == 0;
239 return zf;
240 }
241 case ARMG_CC_OP_LOGIC: {
242 /* (res, shco, oldV) */
243 UInt res = cc_dep1;
244 UInt zf = res == 0;
245 return zf;
246 }
247 case ARMG_CC_OP_MUL: {
248 /* (res, unused, oldC:oldV) */
249 UInt res = cc_dep1;
250 UInt zf = res == 0;
251 return zf;
252 }
253 case ARMG_CC_OP_MULL: {
254 /* (resLo32, resHi32, oldC:oldV) */
255 UInt resLo32 = cc_dep1;
256 UInt resHi32 = cc_dep2;
257 UInt zf = (resHi32|resLo32) == 0;
258 return zf;
259 }
260 default:
261 /* shouldn't really make these calls from generated code */
262 vex_printf("armg_calculate_flags_z"
263 "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
264 cc_op, cc_dep1, cc_dep2, cc_dep3 );
265 vpanic("armg_calculate_flags_z");
266 }
267 }
268
269
270 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
271 /* Calculate the C flag from the supplied thunk components, in the
272 least significant bit of the word. Returned bits 31:1 are zero. */
armg_calculate_flag_c(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_dep3)273 UInt armg_calculate_flag_c ( UInt cc_op, UInt cc_dep1,
274 UInt cc_dep2, UInt cc_dep3 )
275 {
276 # if PROFILE_NZCV_FLAGS
277 NOTE_C_EVAL(cc_op);
278 # endif
279
280 switch (cc_op) {
281 case ARMG_CC_OP_COPY: {
282 /* (nzcv:28x0, unused, unused) */
283 UInt cf = (cc_dep1 >> ARMG_CC_SHIFT_C) & 1;
284 return cf;
285 }
286 case ARMG_CC_OP_ADD: {
287 /* (argL, argR, unused) */
288 UInt argL = cc_dep1;
289 UInt argR = cc_dep2;
290 UInt res = argL + argR;
291 UInt cf = res < argL;
292 return cf;
293 }
294 case ARMG_CC_OP_SUB: {
295 /* (argL, argR, unused) */
296 UInt argL = cc_dep1;
297 UInt argR = cc_dep2;
298 UInt cf = argL >= argR;
299 return cf;
300 }
301 case ARMG_CC_OP_ADC: {
302 /* (argL, argR, oldC) */
303 UInt argL = cc_dep1;
304 UInt argR = cc_dep2;
305 UInt oldC = cc_dep3;
306 vassert((oldC & ~1) == 0);
307 UInt res = argL + argR + oldC;
308 UInt cf = oldC ? (res <= argL) : (res < argL);
309 return cf;
310 }
311 case ARMG_CC_OP_SBB: {
312 /* (argL, argR, oldC) */
313 UInt argL = cc_dep1;
314 UInt argR = cc_dep2;
315 UInt oldC = cc_dep3;
316 vassert((oldC & ~1) == 0);
317 UInt cf = oldC ? (argL >= argR) : (argL > argR);
318 return cf;
319 }
320 case ARMG_CC_OP_LOGIC: {
321 /* (res, shco, oldV) */
322 UInt shco = cc_dep2;
323 vassert((shco & ~1) == 0);
324 UInt cf = shco;
325 return cf;
326 }
327 case ARMG_CC_OP_MUL: {
328 /* (res, unused, oldC:oldV) */
329 UInt oldC = (cc_dep3 >> 1) & 1;
330 vassert((cc_dep3 & ~3) == 0);
331 UInt cf = oldC;
332 return cf;
333 }
334 case ARMG_CC_OP_MULL: {
335 /* (resLo32, resHi32, oldC:oldV) */
336 UInt oldC = (cc_dep3 >> 1) & 1;
337 vassert((cc_dep3 & ~3) == 0);
338 UInt cf = oldC;
339 return cf;
340 }
341 default:
342 /* shouldn't really make these calls from generated code */
343 vex_printf("armg_calculate_flag_c"
344 "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
345 cc_op, cc_dep1, cc_dep2, cc_dep3 );
346 vpanic("armg_calculate_flag_c");
347 }
348 }
349
350
351 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
352 /* Calculate the V flag from the supplied thunk components, in the
353 least significant bit of the word. Returned bits 31:1 are zero. */
armg_calculate_flag_v(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_dep3)354 UInt armg_calculate_flag_v ( UInt cc_op, UInt cc_dep1,
355 UInt cc_dep2, UInt cc_dep3 )
356 {
357 # if PROFILE_NZCV_FLAGS
358 NOTE_V_EVAL(cc_op);
359 # endif
360
361 switch (cc_op) {
362 case ARMG_CC_OP_COPY: {
363 /* (nzcv:28x0, unused, unused) */
364 UInt vf = (cc_dep1 >> ARMG_CC_SHIFT_V) & 1;
365 return vf;
366 }
367 case ARMG_CC_OP_ADD: {
368 /* (argL, argR, unused) */
369 UInt argL = cc_dep1;
370 UInt argR = cc_dep2;
371 UInt res = argL + argR;
372 UInt vf = ((res ^ argL) & (res ^ argR)) >> 31;
373 return vf;
374 }
375 case ARMG_CC_OP_SUB: {
376 /* (argL, argR, unused) */
377 UInt argL = cc_dep1;
378 UInt argR = cc_dep2;
379 UInt res = argL - argR;
380 UInt vf = ((argL ^ argR) & (argL ^ res)) >> 31;
381 return vf;
382 }
383 case ARMG_CC_OP_ADC: {
384 /* (argL, argR, oldC) */
385 UInt argL = cc_dep1;
386 UInt argR = cc_dep2;
387 UInt oldC = cc_dep3;
388 vassert((oldC & ~1) == 0);
389 UInt res = argL + argR + oldC;
390 UInt vf = ((res ^ argL) & (res ^ argR)) >> 31;
391 return vf;
392 }
393 case ARMG_CC_OP_SBB: {
394 /* (argL, argR, oldC) */
395 UInt argL = cc_dep1;
396 UInt argR = cc_dep2;
397 UInt oldC = cc_dep3;
398 vassert((oldC & ~1) == 0);
399 UInt res = argL - argR - (oldC ^ 1);
400 UInt vf = ((argL ^ argR) & (argL ^ res)) >> 31;
401 return vf;
402 }
403 case ARMG_CC_OP_LOGIC: {
404 /* (res, shco, oldV) */
405 UInt oldV = cc_dep3;
406 vassert((oldV & ~1) == 0);
407 UInt vf = oldV;
408 return vf;
409 }
410 case ARMG_CC_OP_MUL: {
411 /* (res, unused, oldC:oldV) */
412 UInt oldV = (cc_dep3 >> 0) & 1;
413 vassert((cc_dep3 & ~3) == 0);
414 UInt vf = oldV;
415 return vf;
416 }
417 case ARMG_CC_OP_MULL: {
418 /* (resLo32, resHi32, oldC:oldV) */
419 UInt oldV = (cc_dep3 >> 0) & 1;
420 vassert((cc_dep3 & ~3) == 0);
421 UInt vf = oldV;
422 return vf;
423 }
424 default:
425 /* shouldn't really make these calls from generated code */
426 vex_printf("armg_calculate_flag_v"
427 "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
428 cc_op, cc_dep1, cc_dep2, cc_dep3 );
429 vpanic("armg_calculate_flag_v");
430 }
431 }
432
433
434 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
435 /* Calculate NZCV from the supplied thunk components, in the positions
436 they appear in the CPSR, viz bits 31:28 for N Z C V respectively.
437 Returned bits 27:0 are zero. */
armg_calculate_flags_nzcv(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_dep3)438 UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1,
439 UInt cc_dep2, UInt cc_dep3 )
440 {
441 UInt f;
442 UInt res = 0;
443 f = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
444 res |= (f << ARMG_CC_SHIFT_N);
445 f = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
446 res |= (f << ARMG_CC_SHIFT_Z);
447 f = armg_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
448 res |= (f << ARMG_CC_SHIFT_C);
449 f = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
450 res |= (f << ARMG_CC_SHIFT_V);
451 return res;
452 }
453
454
455 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
456 /* Calculate the QC flag from the arguments, in the lowest bit
457 of the word (bit 0). Urr, having this out of line is bizarre.
458 Push back inline. */
armg_calculate_flag_qc(UInt resL1,UInt resL2,UInt resR1,UInt resR2)459 UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
460 UInt resR1, UInt resR2 )
461 {
462 if (resL1 != resR1 || resL2 != resR2)
463 return 1;
464 else
465 return 0;
466 }
467
468 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
469 /* Calculate the specified condition from the thunk components, in the
470 lowest bit of the word (bit 0). Returned bits 31:1 are zero. */
armg_calculate_condition(UInt cond_n_op,UInt cc_dep1,UInt cc_dep2,UInt cc_dep3)471 UInt armg_calculate_condition ( UInt cond_n_op /* (ARMCondcode << 4) | cc_op */,
472 UInt cc_dep1,
473 UInt cc_dep2, UInt cc_dep3 )
474 {
475 UInt cond = cond_n_op >> 4;
476 UInt cc_op = cond_n_op & 0xF;
477 UInt nf, zf, vf, cf, inv;
478 // vex_printf("XXXXXXXX %x %x %x %x\n",
479 // cond_n_op, cc_dep1, cc_dep2, cc_dep3);
480
481 // skip flags computation in this case
482 if (cond == ARMCondAL) return 1;
483
484 inv = cond & 1;
485
486 switch (cond) {
487 case ARMCondEQ: // Z=1 => z
488 case ARMCondNE: // Z=0
489 zf = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
490 return inv ^ zf;
491
492 case ARMCondHS: // C=1 => c
493 case ARMCondLO: // C=0
494 cf = armg_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
495 return inv ^ cf;
496
497 case ARMCondMI: // N=1 => n
498 case ARMCondPL: // N=0
499 nf = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
500 return inv ^ nf;
501
502 case ARMCondVS: // V=1 => v
503 case ARMCondVC: // V=0
504 vf = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
505 return inv ^ vf;
506
507 case ARMCondHI: // C=1 && Z=0 => c & ~z
508 case ARMCondLS: // C=0 || Z=1
509 cf = armg_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
510 zf = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
511 return inv ^ (1 & (cf & ~zf));
512
513 case ARMCondGE: // N=V => ~(n^v)
514 case ARMCondLT: // N!=V
515 nf = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
516 vf = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
517 return inv ^ (1 & ~(nf ^ vf));
518
519 case ARMCondGT: // Z=0 && N=V => ~z & ~(n^v) => ~(z | (n^v))
520 case ARMCondLE: // Z=1 || N!=V
521 nf = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
522 vf = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
523 zf = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
524 return inv ^ (1 & ~(zf | (nf ^ vf)));
525
526 case ARMCondAL: // handled above
527 case ARMCondNV: // should never get here: Illegal instr
528 default:
529 /* shouldn't really make these calls from generated code */
530 vex_printf("armg_calculate_condition(ARM)"
531 "( %u, %u, 0x%x, 0x%x, 0x%x )\n",
532 cond, cc_op, cc_dep1, cc_dep2, cc_dep3 );
533 vpanic("armg_calculate_condition(ARM)");
534 }
535 }
536
537
538 /*---------------------------------------------------------------*/
539 /*--- Crypto instruction helpers ---*/
540 /*---------------------------------------------------------------*/
541
542 /* DIRTY HELPERS for doing AES support:
543 * AESE (SubBytes, then ShiftRows)
544 * AESD (InvShiftRows, then InvSubBytes)
545 * AESMC (MixColumns)
546 * AESIMC (InvMixColumns)
547 These don't actually have to be dirty helpers -- they could be
548 clean, but for the fact that they return a V128 and a clean helper
549 can't do that.
550
551 These just call onwards to the implementations of the same in
552 guest_arm64_helpers.c. In all of these cases, we expect |res| to
553 be at least 8 aligned.
554 */
555 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_AESE(V128 * res,UInt arg32_3,UInt arg32_2,UInt arg32_1,UInt arg32_0)556 void armg_dirtyhelper_AESE (
557 /*OUT*/V128* res,
558 UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
559 )
560 {
561 vassert(0 == (((HWord)res) & (8-1)));
562 ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2);
563 ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0);
564 arm64g_dirtyhelper_AESE(res, argHi, argLo);
565 }
566
567 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_AESD(V128 * res,UInt arg32_3,UInt arg32_2,UInt arg32_1,UInt arg32_0)568 void armg_dirtyhelper_AESD (
569 /*OUT*/V128* res,
570 UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
571 )
572 {
573 vassert(0 == (((HWord)res) & (8-1)));
574 ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2);
575 ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0);
576 arm64g_dirtyhelper_AESD(res, argHi, argLo);
577 }
578
579 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_AESMC(V128 * res,UInt arg32_3,UInt arg32_2,UInt arg32_1,UInt arg32_0)580 void armg_dirtyhelper_AESMC (
581 /*OUT*/V128* res,
582 UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
583 )
584 {
585 vassert(0 == (((HWord)res) & (8-1)));
586 ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2);
587 ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0);
588 arm64g_dirtyhelper_AESMC(res, argHi, argLo);
589 }
590
591 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_AESIMC(V128 * res,UInt arg32_3,UInt arg32_2,UInt arg32_1,UInt arg32_0)592 void armg_dirtyhelper_AESIMC (
593 /*OUT*/V128* res,
594 UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
595 )
596 {
597 vassert(0 == (((HWord)res) & (8-1)));
598 ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2);
599 ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0);
600 arm64g_dirtyhelper_AESIMC(res, argHi, argLo);
601 }
602
603
604 /* DIRTY HELPERS for the SHA instruction family. Same comments
605 as for the AES group above apply.
606 */
607
608 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA1C(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)609 void armg_dirtyhelper_SHA1C (
610 /*OUT*/V128* res,
611 UInt argD3, UInt argD2, UInt argD1, UInt argD0,
612 UInt argN3, UInt argN2, UInt argN1, UInt argN0,
613 UInt argM3, UInt argM2, UInt argM1, UInt argM0
614 )
615 {
616 vassert(0 == (((HWord)res) & (8-1)));
617 ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
618 ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
619 ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
620 ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
621 ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
622 ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
623 arm64g_dirtyhelper_SHA1C(res, argDhi, argDlo,
624 argNhi, argNlo, argMhi, argMlo);
625 }
626
627 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA1P(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)628 void armg_dirtyhelper_SHA1P (
629 /*OUT*/V128* res,
630 UInt argD3, UInt argD2, UInt argD1, UInt argD0,
631 UInt argN3, UInt argN2, UInt argN1, UInt argN0,
632 UInt argM3, UInt argM2, UInt argM1, UInt argM0
633 )
634 {
635 vassert(0 == (((HWord)res) & (8-1)));
636 ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
637 ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
638 ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
639 ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
640 ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
641 ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
642 arm64g_dirtyhelper_SHA1P(res, argDhi, argDlo,
643 argNhi, argNlo, argMhi, argMlo);
644 }
645
646 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA1M(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)647 void armg_dirtyhelper_SHA1M (
648 /*OUT*/V128* res,
649 UInt argD3, UInt argD2, UInt argD1, UInt argD0,
650 UInt argN3, UInt argN2, UInt argN1, UInt argN0,
651 UInt argM3, UInt argM2, UInt argM1, UInt argM0
652 )
653 {
654 vassert(0 == (((HWord)res) & (8-1)));
655 ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
656 ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
657 ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
658 ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
659 ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
660 ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
661 arm64g_dirtyhelper_SHA1M(res, argDhi, argDlo,
662 argNhi, argNlo, argMhi, argMlo);
663 }
664
665 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA1SU0(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)666 void armg_dirtyhelper_SHA1SU0 (
667 /*OUT*/V128* res,
668 UInt argD3, UInt argD2, UInt argD1, UInt argD0,
669 UInt argN3, UInt argN2, UInt argN1, UInt argN0,
670 UInt argM3, UInt argM2, UInt argM1, UInt argM0
671 )
672 {
673 vassert(0 == (((HWord)res) & (8-1)));
674 ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
675 ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
676 ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
677 ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
678 ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
679 ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
680 arm64g_dirtyhelper_SHA1SU0(res, argDhi, argDlo,
681 argNhi, argNlo, argMhi, argMlo);
682 }
683
684 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA256H(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)685 void armg_dirtyhelper_SHA256H (
686 /*OUT*/V128* res,
687 UInt argD3, UInt argD2, UInt argD1, UInt argD0,
688 UInt argN3, UInt argN2, UInt argN1, UInt argN0,
689 UInt argM3, UInt argM2, UInt argM1, UInt argM0
690 )
691 {
692 vassert(0 == (((HWord)res) & (8-1)));
693 ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
694 ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
695 ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
696 ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
697 ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
698 ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
699 arm64g_dirtyhelper_SHA256H(res, argDhi, argDlo,
700 argNhi, argNlo, argMhi, argMlo);
701 }
702
703 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA256H2(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)704 void armg_dirtyhelper_SHA256H2 (
705 /*OUT*/V128* res,
706 UInt argD3, UInt argD2, UInt argD1, UInt argD0,
707 UInt argN3, UInt argN2, UInt argN1, UInt argN0,
708 UInt argM3, UInt argM2, UInt argM1, UInt argM0
709 )
710 {
711 vassert(0 == (((HWord)res) & (8-1)));
712 ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
713 ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
714 ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
715 ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
716 ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
717 ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
718 arm64g_dirtyhelper_SHA256H2(res, argDhi, argDlo,
719 argNhi, argNlo, argMhi, argMlo);
720 }
721
722 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA256SU1(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argN3,UInt argN2,UInt argN1,UInt argN0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)723 void armg_dirtyhelper_SHA256SU1 (
724 /*OUT*/V128* res,
725 UInt argD3, UInt argD2, UInt argD1, UInt argD0,
726 UInt argN3, UInt argN2, UInt argN1, UInt argN0,
727 UInt argM3, UInt argM2, UInt argM1, UInt argM0
728 )
729 {
730 vassert(0 == (((HWord)res) & (8-1)));
731 ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
732 ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
733 ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
734 ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
735 ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
736 ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
737 arm64g_dirtyhelper_SHA256SU1(res, argDhi, argDlo,
738 argNhi, argNlo, argMhi, argMlo);
739 }
740
741 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA1SU1(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)742 void armg_dirtyhelper_SHA1SU1 (
743 /*OUT*/V128* res,
744 UInt argD3, UInt argD2, UInt argD1, UInt argD0,
745 UInt argM3, UInt argM2, UInt argM1, UInt argM0
746 )
747 {
748 vassert(0 == (((HWord)res) & (8-1)));
749 ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
750 ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
751 ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
752 ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
753 arm64g_dirtyhelper_SHA1SU1(res, argDhi, argDlo, argMhi, argMlo);
754 }
755
756 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA256SU0(V128 * res,UInt argD3,UInt argD2,UInt argD1,UInt argD0,UInt argM3,UInt argM2,UInt argM1,UInt argM0)757 void armg_dirtyhelper_SHA256SU0 (
758 /*OUT*/V128* res,
759 UInt argD3, UInt argD2, UInt argD1, UInt argD0,
760 UInt argM3, UInt argM2, UInt argM1, UInt argM0
761 )
762 {
763 vassert(0 == (((HWord)res) & (8-1)));
764 ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
765 ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
766 ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
767 ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
768 arm64g_dirtyhelper_SHA256SU0(res, argDhi, argDlo, argMhi, argMlo);
769 }
770
771 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_SHA1H(V128 * res,UInt argM3,UInt argM2,UInt argM1,UInt argM0)772 void armg_dirtyhelper_SHA1H (
773 /*OUT*/V128* res,
774 UInt argM3, UInt argM2, UInt argM1, UInt argM0
775 )
776 {
777 vassert(0 == (((HWord)res) & (8-1)));
778 ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
779 ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
780 arm64g_dirtyhelper_SHA1H(res, argMhi, argMlo);
781 }
782
783 /* CALLED FROM GENERATED CODE */
armg_dirtyhelper_VMULLP64(V128 * res,UInt argN1,UInt argN0,UInt argM1,UInt argM0)784 void armg_dirtyhelper_VMULLP64 (
785 /*OUT*/V128* res,
786 UInt argN1, UInt argN0, UInt argM1, UInt argM0
787 )
788 {
789 vassert(0 == (((HWord)res) & (8-1)));
790 ULong argN = (((ULong)argN1) << 32) | ((ULong)argN0);
791 ULong argM = (((ULong)argM1) << 32) | ((ULong)argM0);
792 arm64g_dirtyhelper_PMULLQ(res, argN, argM);
793 }
794
795
796 /*---------------------------------------------------------------*/
797 /*--- Flag-helpers translation-time function specialisers. ---*/
798 /*--- These help iropt specialise calls the above run-time ---*/
799 /*--- flags functions. ---*/
800 /*---------------------------------------------------------------*/
801
802 /* Used by the optimiser to try specialisations. Returns an
803 equivalent expression, or NULL if none. */
804
isU32(IRExpr * e,UInt n)805 static Bool isU32 ( IRExpr* e, UInt n )
806 {
807 return
808 toBool( e->tag == Iex_Const
809 && e->Iex.Const.con->tag == Ico_U32
810 && e->Iex.Const.con->Ico.U32 == n );
811 }
812
guest_arm_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)813 IRExpr* guest_arm_spechelper ( const HChar* function_name,
814 IRExpr** args,
815 IRStmt** precedingStmts,
816 Int n_precedingStmts )
817 {
818 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
819 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
820 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
821 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
822
823 Int i, arity = 0;
824 for (i = 0; args[i]; i++)
825 arity++;
826 # if 0
827 vex_printf("spec request:\n");
828 vex_printf(" %s ", function_name);
829 for (i = 0; i < arity; i++) {
830 vex_printf(" ");
831 ppIRExpr(args[i]);
832 }
833 vex_printf("\n");
834 # endif
835
836 /* --------- specialising "armg_calculate_condition" --------- */
837
838 if (vex_streq(function_name, "armg_calculate_condition")) {
839
840 /* specialise calls to the "armg_calculate_condition" function.
841 Not sure whether this is strictly necessary, but: the
842 replacement IR must produce only the values 0 or 1. Bits
843 31:1 are required to be zero. */
844 IRExpr *cond_n_op, *cc_dep1, *cc_dep2, *cc_ndep;
845 vassert(arity == 4);
846 cond_n_op = args[0]; /* (ARMCondcode << 4) | ARMG_CC_OP_* */
847 cc_dep1 = args[1];
848 cc_dep2 = args[2];
849 cc_ndep = args[3];
850
851 /*---------------- SUB ----------------*/
852
853 if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_SUB)) {
854 /* EQ after SUB --> test argL == argR */
855 return unop(Iop_1Uto32,
856 binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
857 }
858 if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_SUB)) {
859 /* NE after SUB --> test argL != argR */
860 return unop(Iop_1Uto32,
861 binop(Iop_CmpNE32, cc_dep1, cc_dep2));
862 }
863
864 if (isU32(cond_n_op, (ARMCondGT << 4) | ARMG_CC_OP_SUB)) {
865 /* GT after SUB --> test argL >s argR
866 --> test argR <s argL */
867 return unop(Iop_1Uto32,
868 binop(Iop_CmpLT32S, cc_dep2, cc_dep1));
869 }
870 if (isU32(cond_n_op, (ARMCondLE << 4) | ARMG_CC_OP_SUB)) {
871 /* LE after SUB --> test argL <=s argR */
872 return unop(Iop_1Uto32,
873 binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
874 }
875
876 if (isU32(cond_n_op, (ARMCondLT << 4) | ARMG_CC_OP_SUB)) {
877 /* LT after SUB --> test argL <s argR */
878 return unop(Iop_1Uto32,
879 binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
880 }
881
882 if (isU32(cond_n_op, (ARMCondGE << 4) | ARMG_CC_OP_SUB)) {
883 /* GE after SUB --> test argL >=s argR
884 --> test argR <=s argL */
885 return unop(Iop_1Uto32,
886 binop(Iop_CmpLE32S, cc_dep2, cc_dep1));
887 }
888
889 if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SUB)) {
890 /* HS after SUB --> test argL >=u argR
891 --> test argR <=u argL */
892 return unop(Iop_1Uto32,
893 binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
894 }
895 if (isU32(cond_n_op, (ARMCondLO << 4) | ARMG_CC_OP_SUB)) {
896 /* LO after SUB --> test argL <u argR */
897 return unop(Iop_1Uto32,
898 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
899 }
900
901 if (isU32(cond_n_op, (ARMCondLS << 4) | ARMG_CC_OP_SUB)) {
902 /* LS after SUB --> test argL <=u argR */
903 return unop(Iop_1Uto32,
904 binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
905 }
906 if (isU32(cond_n_op, (ARMCondHI << 4) | ARMG_CC_OP_SUB)) {
907 /* HI after SUB --> test argL >u argR
908 --> test argR <u argL */
909 return unop(Iop_1Uto32,
910 binop(Iop_CmpLT32U, cc_dep2, cc_dep1));
911 }
912
913 /*---------------- SBB ----------------*/
914
915 if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SBB)) {
916 /* This seems to happen a lot in softfloat code, eg __divdf3+140 */
917 /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
918 /* HS after SBB (same as C after SBB below)
919 --> oldC ? (argL >=u argR) : (argL >u argR)
920 --> oldC ? (argR <=u argL) : (argR <u argL)
921 */
922 return
923 IRExpr_ITE(
924 binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
925 /* case oldC != 0 */
926 unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
927 /* case oldC == 0 */
928 unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
929 );
930 }
931
932 /*---------------- LOGIC ----------------*/
933
934 if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_LOGIC)) {
935 /* EQ after LOGIC --> test res == 0 */
936 return unop(Iop_1Uto32,
937 binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
938 }
939 if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_LOGIC)) {
940 /* NE after LOGIC --> test res != 0 */
941 return unop(Iop_1Uto32,
942 binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
943 }
944
945 if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_LOGIC)) {
946 /* PL after LOGIC --> test (res >> 31) == 0 */
947 return unop(Iop_1Uto32,
948 binop(Iop_CmpEQ32,
949 binop(Iop_Shr32, cc_dep1, mkU8(31)),
950 mkU32(0)));
951 }
952 if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_LOGIC)) {
953 /* MI after LOGIC --> test (res >> 31) == 1 */
954 return unop(Iop_1Uto32,
955 binop(Iop_CmpEQ32,
956 binop(Iop_Shr32, cc_dep1, mkU8(31)),
957 mkU32(1)));
958 }
959
960 /*---------------- COPY ----------------*/
961
962 /* --- 0,1 --- */
963 if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_COPY)) {
964 /* EQ after COPY --> (cc_dep1 >> ARMG_CC_SHIFT_Z) & 1 */
965 return binop(Iop_And32,
966 binop(Iop_Shr32, cc_dep1,
967 mkU8(ARMG_CC_SHIFT_Z)),
968 mkU32(1));
969 }
970 if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_COPY)) {
971 /* NE after COPY --> ((cc_dep1 >> ARMG_CC_SHIFT_Z) ^ 1) & 1 */
972 return binop(Iop_And32,
973 binop(Iop_Xor32,
974 binop(Iop_Shr32, cc_dep1,
975 mkU8(ARMG_CC_SHIFT_Z)),
976 mkU32(1)),
977 mkU32(1));
978 }
979
980 /* --- 4,5 --- */
981 if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_COPY)) {
982 /* MI after COPY --> (cc_dep1 >> ARMG_CC_SHIFT_N) & 1 */
983 return binop(Iop_And32,
984 binop(Iop_Shr32, cc_dep1,
985 mkU8(ARMG_CC_SHIFT_N)),
986 mkU32(1));
987 }
988 if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_COPY)) {
989 /* PL after COPY --> ((cc_dep1 >> ARMG_CC_SHIFT_N) ^ 1) & 1 */
990 return binop(Iop_And32,
991 binop(Iop_Xor32,
992 binop(Iop_Shr32, cc_dep1,
993 mkU8(ARMG_CC_SHIFT_N)),
994 mkU32(1)),
995 mkU32(1));
996 }
997
998 /* --- 12,13 --- */
999 if (isU32(cond_n_op, (ARMCondGT << 4) | ARMG_CC_OP_COPY)) {
1000 /* GT after COPY --> ((z | (n^v)) & 1) ^ 1 */
1001 IRExpr* n = binop(Iop_Shr32, cc_dep1, mkU8(ARMG_CC_SHIFT_N));
1002 IRExpr* v = binop(Iop_Shr32, cc_dep1, mkU8(ARMG_CC_SHIFT_V));
1003 IRExpr* z = binop(Iop_Shr32, cc_dep1, mkU8(ARMG_CC_SHIFT_Z));
1004 return binop(Iop_Xor32,
1005 binop(Iop_And32,
1006 binop(Iop_Or32, z, binop(Iop_Xor32, n, v)),
1007 mkU32(1)),
1008 mkU32(1));
1009 }
1010 if (isU32(cond_n_op, (ARMCondLE << 4) | ARMG_CC_OP_COPY)) {
1011 /* LE after COPY --> ((z | (n^v)) & 1) ^ 0 */
1012 IRExpr* n = binop(Iop_Shr32, cc_dep1, mkU8(ARMG_CC_SHIFT_N));
1013 IRExpr* v = binop(Iop_Shr32, cc_dep1, mkU8(ARMG_CC_SHIFT_V));
1014 IRExpr* z = binop(Iop_Shr32, cc_dep1, mkU8(ARMG_CC_SHIFT_Z));
1015 return binop(Iop_Xor32,
1016 binop(Iop_And32,
1017 binop(Iop_Or32, z, binop(Iop_Xor32, n, v)),
1018 mkU32(1)),
1019 mkU32(0));
1020 }
1021
1022 /*----------------- AL -----------------*/
1023
1024 /* A critically important case for Thumb code.
1025
1026 What we're trying to spot is the case where cond_n_op is an
1027 expression of the form Or32(..., 0xE0) since that means the
1028 caller is asking for CondAL and we can simply return 1
1029 without caring what the ... part is. This is a potentially
1030 dodgy kludge in that it assumes that the ... part has zeroes
1031 in bits 7:4, so that the result of the Or32 is guaranteed to
1032 be 0xE in bits 7:4. Given that the places where this first
1033 arg are constructed (in guest_arm_toIR.c) are very
1034 constrained, we can get away with this. To make this
1035 guaranteed safe would require to have a new primop, Slice44
1036 or some such, thusly
1037
1038 Slice44(arg1, arg2) = 0--(24)--0 arg1[7:4] arg2[3:0]
1039
1040 and we would then look for Slice44(0xE0, ...)
1041 which would give the required safety property.
1042
1043 It would be infeasibly expensive to scan backwards through
1044 the entire block looking for an assignment to the temp, so
1045 just look at the previous 16 statements. That should find it
1046 if it is an interesting case, as a result of how the
1047 boilerplate guff at the start of each Thumb insn translation
1048 is made.
1049 */
1050 if (cond_n_op->tag == Iex_RdTmp) {
1051 Int j;
1052 IRTemp look_for = cond_n_op->Iex.RdTmp.tmp;
1053 Int limit = n_precedingStmts - 16;
1054 if (limit < 0) limit = 0;
1055 if (0) vex_printf("scanning %d .. %d\n", n_precedingStmts-1, limit);
1056 for (j = n_precedingStmts - 1; j >= limit; j--) {
1057 IRStmt* st = precedingStmts[j];
1058 if (st->tag == Ist_WrTmp
1059 && st->Ist.WrTmp.tmp == look_for
1060 && st->Ist.WrTmp.data->tag == Iex_Binop
1061 && st->Ist.WrTmp.data->Iex.Binop.op == Iop_Or32
1062 && isU32(st->Ist.WrTmp.data->Iex.Binop.arg2, (ARMCondAL << 4)))
1063 return mkU32(1);
1064 }
1065 /* Didn't find any useful binding to the first arg
1066 in the previous 16 stmts. */
1067 }
1068 }
1069
1070 /* --------- specialising "armg_calculate_flag_c" --------- */
1071
1072 else
1073 if (vex_streq(function_name, "armg_calculate_flag_c")) {
1074
1075 /* specialise calls to the "armg_calculate_flag_c" function.
1076 Note that the returned value must be either 0 or 1; nonzero
1077 bits 31:1 are not allowed. In turn, incoming oldV and oldC
1078 values (from the thunk) are assumed to have bits 31:1
1079 clear. */
1080 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1081 vassert(arity == 4);
1082 cc_op = args[0]; /* ARMG_CC_OP_* */
1083 cc_dep1 = args[1];
1084 cc_dep2 = args[2];
1085 cc_ndep = args[3];
1086
1087 if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
1088 /* Thunk args are (result, shco, oldV) */
1089 /* C after LOGIC --> shco */
1090 return cc_dep2;
1091 }
1092
1093 if (isU32(cc_op, ARMG_CC_OP_SUB)) {
1094 /* Thunk args are (argL, argR, unused) */
1095 /* C after SUB --> argL >=u argR
1096 --> argR <=u argL */
1097 return unop(Iop_1Uto32,
1098 binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
1099 }
1100
1101 if (isU32(cc_op, ARMG_CC_OP_SBB)) {
1102 /* This happens occasionally in softfloat code, eg __divdf3+140 */
1103 /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1104 /* C after SBB (same as HS after SBB above)
1105 --> oldC ? (argL >=u argR) : (argL >u argR)
1106 --> oldC ? (argR <=u argL) : (argR <u argL)
1107 */
1108 return
1109 IRExpr_ITE(
1110 binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
1111 /* case oldC != 0 */
1112 unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
1113 /* case oldC == 0 */
1114 unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
1115 );
1116 }
1117
1118 }
1119
1120 /* --------- specialising "armg_calculate_flag_v" --------- */
1121
1122 else
1123 if (vex_streq(function_name, "armg_calculate_flag_v")) {
1124
1125 /* specialise calls to the "armg_calculate_flag_v" function.
1126 Note that the returned value must be either 0 or 1; nonzero
1127 bits 31:1 are not allowed. In turn, incoming oldV and oldC
1128 values (from the thunk) are assumed to have bits 31:1
1129 clear. */
1130 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1131 vassert(arity == 4);
1132 cc_op = args[0]; /* ARMG_CC_OP_* */
1133 cc_dep1 = args[1];
1134 cc_dep2 = args[2];
1135 cc_ndep = args[3];
1136
1137 if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
1138 /* Thunk args are (result, shco, oldV) */
1139 /* V after LOGIC --> oldV */
1140 return cc_ndep;
1141 }
1142
1143 if (isU32(cc_op, ARMG_CC_OP_SUB)) {
1144 /* Thunk args are (argL, argR, unused) */
1145 /* V after SUB
1146 --> let res = argL - argR
1147 in ((argL ^ argR) & (argL ^ res)) >> 31
1148 --> ((argL ^ argR) & (argL ^ (argL - argR))) >> 31
1149 */
1150 IRExpr* argL = cc_dep1;
1151 IRExpr* argR = cc_dep2;
1152 return
1153 binop(Iop_Shr32,
1154 binop(Iop_And32,
1155 binop(Iop_Xor32, argL, argR),
1156 binop(Iop_Xor32, argL, binop(Iop_Sub32, argL, argR))
1157 ),
1158 mkU8(31)
1159 );
1160 }
1161
1162 if (isU32(cc_op, ARMG_CC_OP_SBB)) {
1163 /* This happens occasionally in softfloat code, eg __divdf3+140 */
1164 /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1165 /* V after SBB
1166 --> let res = argL - argR - (oldC ^ 1)
1167 in (argL ^ argR) & (argL ^ res) & 1
1168 */
1169 return
1170 binop(
1171 Iop_And32,
1172 binop(
1173 Iop_And32,
1174 // argL ^ argR
1175 binop(Iop_Xor32, cc_dep1, cc_dep2),
1176 // argL ^ (argL - argR - (oldC ^ 1))
1177 binop(Iop_Xor32,
1178 cc_dep1,
1179 binop(Iop_Sub32,
1180 binop(Iop_Sub32, cc_dep1, cc_dep2),
1181 binop(Iop_Xor32, cc_ndep, mkU32(1)))
1182 )
1183 ),
1184 mkU32(1)
1185 );
1186 }
1187
1188 }
1189
1190 # undef unop
1191 # undef binop
1192 # undef mkU32
1193 # undef mkU8
1194
1195 return NULL;
1196 }
1197
1198
1199 /*----------------------------------------------*/
1200 /*--- The exported fns .. ---*/
1201 /*----------------------------------------------*/
1202
1203 /* VISIBLE TO LIBVEX CLIENT */
1204 #if 0
1205 void LibVEX_GuestARM_put_flags ( UInt flags_native,
1206 /*OUT*/VexGuestARMState* vex_state )
1207 {
1208 vassert(0); // FIXME
1209
1210 /* Mask out everything except N Z V C. */
1211 flags_native
1212 &= (ARMG_CC_MASK_N | ARMG_CC_MASK_Z | ARMG_CC_MASK_V | ARMG_CC_MASK_C);
1213
1214 vex_state->guest_CC_OP = ARMG_CC_OP_COPY;
1215 vex_state->guest_CC_DEP1 = flags_native;
1216 vex_state->guest_CC_DEP2 = 0;
1217 vex_state->guest_CC_NDEP = 0;
1218 }
1219 #endif
1220
1221 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestARM_get_cpsr(const VexGuestARMState * vex_state)1222 UInt LibVEX_GuestARM_get_cpsr ( /*IN*/const VexGuestARMState* vex_state )
1223 {
1224 UInt cpsr = 0;
1225 // NZCV
1226 cpsr |= armg_calculate_flags_nzcv(
1227 vex_state->guest_CC_OP,
1228 vex_state->guest_CC_DEP1,
1229 vex_state->guest_CC_DEP2,
1230 vex_state->guest_CC_NDEP
1231 );
1232 vassert(0 == (cpsr & 0x0FFFFFFF));
1233 // Q
1234 if (vex_state->guest_QFLAG32 > 0)
1235 cpsr |= (1 << 27);
1236 // GE
1237 if (vex_state->guest_GEFLAG0 > 0)
1238 cpsr |= (1 << 16);
1239 if (vex_state->guest_GEFLAG1 > 0)
1240 cpsr |= (1 << 17);
1241 if (vex_state->guest_GEFLAG2 > 0)
1242 cpsr |= (1 << 18);
1243 if (vex_state->guest_GEFLAG3 > 0)
1244 cpsr |= (1 << 19);
1245 // M
1246 cpsr |= (1 << 4); // 0b10000 means user-mode
1247 // J,T J (bit 24) is zero by initialisation above
1248 // T we copy from R15T[0]
1249 if (vex_state->guest_R15T & 1)
1250 cpsr |= (1 << 5);
1251 // ITSTATE we punt on for the time being. Could compute it
1252 // if needed though.
1253 // E, endianness, 0 (littleendian) from initialisation above
1254 // A,I,F disable some async exceptions. Not sure about these.
1255 // Leave as zero for the time being.
1256 return cpsr;
1257 }
1258
1259 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestARM_initialise(VexGuestARMState * vex_state)1260 void LibVEX_GuestARM_initialise ( /*OUT*/VexGuestARMState* vex_state )
1261 {
1262 vex_state->host_EvC_FAILADDR = 0;
1263 vex_state->host_EvC_COUNTER = 0;
1264
1265 vex_state->guest_R0 = 0;
1266 vex_state->guest_R1 = 0;
1267 vex_state->guest_R2 = 0;
1268 vex_state->guest_R3 = 0;
1269 vex_state->guest_R4 = 0;
1270 vex_state->guest_R5 = 0;
1271 vex_state->guest_R6 = 0;
1272 vex_state->guest_R7 = 0;
1273 vex_state->guest_R8 = 0;
1274 vex_state->guest_R9 = 0;
1275 vex_state->guest_R10 = 0;
1276 vex_state->guest_R11 = 0;
1277 vex_state->guest_R12 = 0;
1278 vex_state->guest_R13 = 0;
1279 vex_state->guest_R14 = 0;
1280 vex_state->guest_R15T = 0; /* NB: implies ARM mode */
1281
1282 vex_state->guest_CC_OP = ARMG_CC_OP_COPY;
1283 vex_state->guest_CC_DEP1 = 0;
1284 vex_state->guest_CC_DEP2 = 0;
1285 vex_state->guest_CC_NDEP = 0;
1286 vex_state->guest_QFLAG32 = 0;
1287 vex_state->guest_GEFLAG0 = 0;
1288 vex_state->guest_GEFLAG1 = 0;
1289 vex_state->guest_GEFLAG2 = 0;
1290 vex_state->guest_GEFLAG3 = 0;
1291
1292 vex_state->guest_EMNOTE = EmNote_NONE;
1293 vex_state->guest_CMSTART = 0;
1294 vex_state->guest_CMLEN = 0;
1295 vex_state->guest_NRADDR = 0;
1296 vex_state->guest_IP_AT_SYSCALL = 0;
1297
1298 vex_state->guest_D0 = 0;
1299 vex_state->guest_D1 = 0;
1300 vex_state->guest_D2 = 0;
1301 vex_state->guest_D3 = 0;
1302 vex_state->guest_D4 = 0;
1303 vex_state->guest_D5 = 0;
1304 vex_state->guest_D6 = 0;
1305 vex_state->guest_D7 = 0;
1306 vex_state->guest_D8 = 0;
1307 vex_state->guest_D9 = 0;
1308 vex_state->guest_D10 = 0;
1309 vex_state->guest_D11 = 0;
1310 vex_state->guest_D12 = 0;
1311 vex_state->guest_D13 = 0;
1312 vex_state->guest_D14 = 0;
1313 vex_state->guest_D15 = 0;
1314 vex_state->guest_D16 = 0;
1315 vex_state->guest_D17 = 0;
1316 vex_state->guest_D18 = 0;
1317 vex_state->guest_D19 = 0;
1318 vex_state->guest_D20 = 0;
1319 vex_state->guest_D21 = 0;
1320 vex_state->guest_D22 = 0;
1321 vex_state->guest_D23 = 0;
1322 vex_state->guest_D24 = 0;
1323 vex_state->guest_D25 = 0;
1324 vex_state->guest_D26 = 0;
1325 vex_state->guest_D27 = 0;
1326 vex_state->guest_D28 = 0;
1327 vex_state->guest_D29 = 0;
1328 vex_state->guest_D30 = 0;
1329 vex_state->guest_D31 = 0;
1330
1331 /* ARM encoded; zero is the default as it happens (result flags
1332 (NZCV) cleared, FZ disabled, round to nearest, non-vector mode,
1333 all exns masked, all exn sticky bits cleared). */
1334 vex_state->guest_FPSCR = 0;
1335
1336 vex_state->guest_TPIDRURO = 0;
1337
1338 /* Not in a Thumb IT block. */
1339 vex_state->guest_ITSTATE = 0;
1340
1341 vex_state->padding1 = 0;
1342 }
1343
1344
1345 /*-----------------------------------------------------------*/
1346 /*--- Describing the arm guest state, for the benefit ---*/
1347 /*--- of iropt and instrumenters. ---*/
1348 /*-----------------------------------------------------------*/
1349
1350 /* Figure out if any part of the guest state contained in minoff
1351 .. maxoff requires precise memory exceptions. If in doubt return
1352 True (but this generates significantly slower code).
1353
1354 We enforce precise exns for guest R13(sp), R15T(pc), R7, R11.
1355
1356
1357 Only R13(sp) is needed in mode VexRegUpdSpAtMemAccess.
1358 */
guest_arm_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)1359 Bool guest_arm_state_requires_precise_mem_exns (
1360 Int minoff, Int maxoff, VexRegisterUpdates pxControl
1361 )
1362 {
1363 Int sp_min = offsetof(VexGuestARMState, guest_R13);
1364 Int sp_max = sp_min + 4 - 1;
1365 Int pc_min = offsetof(VexGuestARMState, guest_R15T);
1366 Int pc_max = pc_min + 4 - 1;
1367
1368 if (maxoff < sp_min || minoff > sp_max) {
1369 /* no overlap with sp */
1370 if (pxControl == VexRegUpdSpAtMemAccess)
1371 return False; // We only need to check stack pointer.
1372 } else {
1373 return True;
1374 }
1375
1376 if (maxoff < pc_min || minoff > pc_max) {
1377 /* no overlap with pc */
1378 } else {
1379 return True;
1380 }
1381
1382 /* We appear to need precise updates of R11 in order to get proper
1383 stacktraces from non-optimised code. */
1384 Int r11_min = offsetof(VexGuestARMState, guest_R11);
1385 Int r11_max = r11_min + 4 - 1;
1386
1387 if (maxoff < r11_min || minoff > r11_max) {
1388 /* no overlap with r11 */
1389 } else {
1390 return True;
1391 }
1392
1393 /* Ditto R7, particularly needed for proper stacktraces in Thumb
1394 code. */
1395 Int r7_min = offsetof(VexGuestARMState, guest_R7);
1396 Int r7_max = r7_min + 4 - 1;
1397
1398 if (maxoff < r7_min || minoff > r7_max) {
1399 /* no overlap with r7 */
1400 } else {
1401 return True;
1402 }
1403
1404 return False;
1405 }
1406
1407
1408
1409 #define ALWAYSDEFD(field) \
1410 { offsetof(VexGuestARMState, field), \
1411 (sizeof ((VexGuestARMState*)0)->field) }
1412
1413 VexGuestLayout
1414 armGuest_layout
1415 = {
1416 /* Total size of the guest state, in bytes. */
1417 .total_sizeB = sizeof(VexGuestARMState),
1418
1419 /* Describe the stack pointer. */
1420 .offset_SP = offsetof(VexGuestARMState,guest_R13),
1421 .sizeof_SP = 4,
1422
1423 /* Describe the instruction pointer. */
1424 .offset_IP = offsetof(VexGuestARMState,guest_R15T),
1425 .sizeof_IP = 4,
1426
1427 /* Describe any sections to be regarded by Memcheck as
1428 'always-defined'. */
1429 .n_alwaysDefd = 10,
1430
1431 /* flags thunk: OP is always defd, whereas DEP1 and DEP2
1432 have to be tracked. See detailed comment in gdefs.h on
1433 meaning of thunk fields. */
1434 .alwaysDefd
1435 = { /* 0 */ ALWAYSDEFD(guest_R15T),
1436 /* 1 */ ALWAYSDEFD(guest_CC_OP),
1437 /* 2 */ ALWAYSDEFD(guest_CC_NDEP),
1438 /* 3 */ ALWAYSDEFD(guest_EMNOTE),
1439 /* 4 */ ALWAYSDEFD(guest_CMSTART),
1440 /* 5 */ ALWAYSDEFD(guest_CMLEN),
1441 /* 6 */ ALWAYSDEFD(guest_NRADDR),
1442 /* 7 */ ALWAYSDEFD(guest_IP_AT_SYSCALL),
1443 /* 8 */ ALWAYSDEFD(guest_TPIDRURO),
1444 /* 9 */ ALWAYSDEFD(guest_ITSTATE)
1445 }
1446 };
1447
1448
1449 /*---------------------------------------------------------------*/
1450 /*--- end guest_arm_helpers.c ---*/
1451 /*---------------------------------------------------------------*/
1452