1
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_arm64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2013-2017 OpenWorks
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #include "libvex_basictypes.h"
32 #include "libvex_emnote.h"
33 #include "libvex_guest_arm64.h"
34 #include "libvex_ir.h"
35 #include "libvex.h"
36
37 #include "main_util.h"
38 #include "main_globals.h"
39 #include "guest_generic_bb_to_IR.h"
40 #include "guest_arm64_defs.h"
41
42
43 /* This file contains helper functions for arm guest code. Calls to
44 these functions are generated by the back end. These calls are of
45 course in the host machine code and this file will be compiled to
46 host machine code, so that all makes sense.
47
48 Only change the signatures of these helper functions very
49 carefully. If you change the signature here, you'll have to change
50 the parameters passed to it in the IR calls constructed by
51 guest_arm64_toIR.c.
52 */
53
54
55 /* Set to 1 to get detailed profiling info about individual N, Z, C
56 and V flag evaluation. */
57 #define PROFILE_NZCV_FLAGS 0
58
59 #if PROFILE_NZCV_FLAGS
60
61 static UInt tab_eval[ARM64G_CC_OP_NUMBER][16];
62 static UInt initted = 0;
63 static UInt tot_evals = 0;
64
initCounts(void)65 static void initCounts ( void )
66 {
67 UInt i, j;
68 for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
69 for (j = 0; j < 16; j++) {
70 tab_eval[i][j] = 0;
71 }
72 }
73 initted = 1;
74 }
75
showCounts(void)76 static void showCounts ( void )
77 {
78 const HChar* nameCC[16]
79 = { "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC",
80 "HI", "LS", "GE", "LT", "GT", "LE", "AL", "NV" };
81 UInt i, j;
82 ULong sum = 0;
83 vex_printf("\nCC_OP 0 1 2 3 "
84 " 4 5 6\n");
85 vex_printf( "--------------------------------------------------"
86 "--------------------------\n");
87 for (j = 0; j < 16; j++) {
88 vex_printf("%2d %s ", j, nameCC[j]);
89 for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
90 vex_printf("%9d ", tab_eval[i][j]);
91 sum += tab_eval[i][j];
92 }
93 vex_printf("\n");
94 }
95 vex_printf("(In total %llu calls)\n", sum);
96 }
97
98 #define NOTE_EVAL(_cc_op, _cond) \
99 do { \
100 if (!initted) initCounts(); \
101 vassert( ((UInt)(_cc_op)) < ARM64G_CC_OP_NUMBER); \
102 vassert( ((UInt)(_cond)) < 16); \
103 tab_eval[(UInt)(_cc_op)][(UInt)(cond)]++; \
104 tot_evals++; \
105 if (0 == (tot_evals & 0x7FFF)) \
106 showCounts(); \
107 } while (0)
108
109 #endif /* PROFILE_NZCV_FLAGS */
110
111
112 /* Calculate the N flag from the supplied thunk components, in the
113 least significant bit of the word. Returned bits 63:1 are zero. */
114 static
arm64g_calculate_flag_n(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_dep3)115 ULong arm64g_calculate_flag_n ( ULong cc_op, ULong cc_dep1,
116 ULong cc_dep2, ULong cc_dep3 )
117 {
118 switch (cc_op) {
119 case ARM64G_CC_OP_COPY: {
120 /* (nzcv:28x0, unused, unused) */
121 ULong nf = (cc_dep1 >> ARM64G_CC_SHIFT_N) & 1;
122 return nf;
123 }
124 case ARM64G_CC_OP_ADD32: {
125 /* (argL, argR, unused) */
126 UInt argL = (UInt)cc_dep1;
127 UInt argR = (UInt)cc_dep2;
128 UInt res = argL + argR;
129 ULong nf = (ULong)(res >> 31);
130 return nf;
131 }
132 case ARM64G_CC_OP_ADD64: {
133 /* (argL, argR, unused) */
134 ULong argL = cc_dep1;
135 ULong argR = cc_dep2;
136 ULong res = argL + argR;
137 ULong nf = (ULong)(res >> 63);
138 return nf;
139 }
140 case ARM64G_CC_OP_SUB32: {
141 /* (argL, argR, unused) */
142 UInt argL = (UInt)cc_dep1;
143 UInt argR = (UInt)cc_dep2;
144 UInt res = argL - argR;
145 ULong nf = (ULong)(res >> 31);
146 return nf;
147 }
148 case ARM64G_CC_OP_SUB64: {
149 /* (argL, argR, unused) */
150 ULong argL = cc_dep1;
151 ULong argR = cc_dep2;
152 ULong res = argL - argR;
153 ULong nf = res >> 63;
154 return nf;
155 }
156 case ARM64G_CC_OP_ADC32: {
157 /* (argL, argR, oldC) */
158 UInt argL = cc_dep1;
159 UInt argR = cc_dep2;
160 UInt oldC = cc_dep3;
161 vassert((oldC & ~1) == 0);
162 UInt res = argL + argR + oldC;
163 ULong nf = (ULong)(res >> 31);
164 return nf;
165 }
166 case ARM64G_CC_OP_ADC64: {
167 /* (argL, argR, oldC) */
168 ULong argL = cc_dep1;
169 ULong argR = cc_dep2;
170 ULong oldC = cc_dep3;
171 vassert((oldC & ~1) == 0);
172 ULong res = argL + argR + oldC;
173 ULong nf = res >> 63;
174 return nf;
175 }
176 case ARM64G_CC_OP_SBC32: {
177 /* (argL, argR, oldC) */
178 UInt argL = cc_dep1;
179 UInt argR = cc_dep2;
180 UInt oldC = cc_dep3;
181 vassert((oldC & ~1) == 0);
182 UInt res = argL - argR - (oldC ^ 1);
183 ULong nf = (ULong)(res >> 31);
184 return nf;
185 }
186 case ARM64G_CC_OP_SBC64: {
187 /* (argL, argR, oldC) */
188 ULong argL = cc_dep1;
189 ULong argR = cc_dep2;
190 ULong oldC = cc_dep3;
191 vassert((oldC & ~1) == 0);
192 ULong res = argL - argR - (oldC ^ 1);
193 ULong nf = res >> 63;
194 return nf;
195 }
196 case ARM64G_CC_OP_LOGIC32: {
197 /* (res, unused, unused) */
198 UInt res = (UInt)cc_dep1;
199 ULong nf = res >> 31;
200 return nf;
201 }
202 case ARM64G_CC_OP_LOGIC64: {
203 /* (res, unused, unused) */
204 ULong res = cc_dep1;
205 ULong nf = res >> 63;
206 return nf;
207 }
208 //ZZ case ARMG_CC_OP_MUL: {
209 //ZZ /* (res, unused, oldC:oldV) */
210 //ZZ UInt res = cc_dep1;
211 //ZZ UInt nf = res >> 31;
212 //ZZ return nf;
213 //ZZ }
214 //ZZ case ARMG_CC_OP_MULL: {
215 //ZZ /* (resLo32, resHi32, oldC:oldV) */
216 //ZZ UInt resHi32 = cc_dep2;
217 //ZZ UInt nf = resHi32 >> 31;
218 //ZZ return nf;
219 //ZZ }
220 default:
221 /* shouldn't really make these calls from generated code */
222 vex_printf("arm64g_calculate_flag_n"
223 "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
224 cc_op, cc_dep1, cc_dep2, cc_dep3 );
225 vpanic("arm64g_calculate_flag_n");
226 }
227 }
228
229
230 /* Calculate the Z flag from the supplied thunk components, in the
231 least significant bit of the word. Returned bits 63:1 are zero. */
232 static
arm64g_calculate_flag_z(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_dep3)233 ULong arm64g_calculate_flag_z ( ULong cc_op, ULong cc_dep1,
234 ULong cc_dep2, ULong cc_dep3 )
235 {
236 switch (cc_op) {
237 case ARM64G_CC_OP_COPY: {
238 /* (nzcv:28x0, unused, unused) */
239 ULong zf = (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1;
240 return zf;
241 }
242 case ARM64G_CC_OP_ADD32: {
243 /* (argL, argR, unused) */
244 UInt argL = (UInt)cc_dep1;
245 UInt argR = (UInt)cc_dep2;
246 UInt res = argL + argR;
247 ULong zf = res == 0;
248 return zf;
249 }
250 case ARM64G_CC_OP_ADD64: {
251 /* (argL, argR, unused) */
252 ULong argL = cc_dep1;
253 ULong argR = cc_dep2;
254 ULong res = argL + argR;
255 ULong zf = res == 0;
256 return zf;
257 }
258 case ARM64G_CC_OP_SUB32: {
259 /* (argL, argR, unused) */
260 UInt argL = (UInt)cc_dep1;
261 UInt argR = (UInt)cc_dep2;
262 UInt res = argL - argR;
263 ULong zf = res == 0;
264 return zf;
265 }
266 case ARM64G_CC_OP_SUB64: {
267 /* (argL, argR, unused) */
268 ULong argL = cc_dep1;
269 ULong argR = cc_dep2;
270 ULong res = argL - argR;
271 ULong zf = res == 0;
272 return zf;
273 }
274 case ARM64G_CC_OP_ADC32: {
275 /* (argL, argR, oldC) */
276 UInt argL = cc_dep1;
277 UInt argR = cc_dep2;
278 UInt oldC = cc_dep3;
279 vassert((oldC & ~1) == 0);
280 UInt res = argL + argR + oldC;
281 ULong zf = res == 0;
282 return zf;
283 }
284 case ARM64G_CC_OP_ADC64: {
285 /* (argL, argR, oldC) */
286 ULong argL = cc_dep1;
287 ULong argR = cc_dep2;
288 ULong oldC = cc_dep3;
289 vassert((oldC & ~1) == 0);
290 ULong res = argL + argR + oldC;
291 ULong zf = res == 0;
292 return zf;
293 }
294 case ARM64G_CC_OP_SBC32: {
295 /* (argL, argR, oldC) */
296 UInt argL = cc_dep1;
297 UInt argR = cc_dep2;
298 UInt oldC = cc_dep3;
299 vassert((oldC & ~1) == 0);
300 UInt res = argL - argR - (oldC ^ 1);
301 ULong zf = res == 0;
302 return zf;
303 }
304 case ARM64G_CC_OP_SBC64: {
305 /* (argL, argR, oldC) */
306 ULong argL = cc_dep1;
307 ULong argR = cc_dep2;
308 ULong oldC = cc_dep3;
309 vassert((oldC & ~1) == 0);
310 ULong res = argL - argR - (oldC ^ 1);
311 ULong zf = res == 0;
312 return zf;
313 }
314 case ARM64G_CC_OP_LOGIC32: {
315 /* (res, unused, unused) */
316 UInt res = (UInt)cc_dep1;
317 ULong zf = res == 0;
318 return zf;
319 }
320 case ARM64G_CC_OP_LOGIC64: {
321 /* (res, unused, unused) */
322 ULong res = cc_dep1;
323 ULong zf = res == 0;
324 return zf;
325 }
326 //ZZ case ARMG_CC_OP_MUL: {
327 //ZZ /* (res, unused, oldC:oldV) */
328 //ZZ UInt res = cc_dep1;
329 //ZZ UInt zf = res == 0;
330 //ZZ return zf;
331 //ZZ }
332 //ZZ case ARMG_CC_OP_MULL: {
333 //ZZ /* (resLo32, resHi32, oldC:oldV) */
334 //ZZ UInt resLo32 = cc_dep1;
335 //ZZ UInt resHi32 = cc_dep2;
336 //ZZ UInt zf = (resHi32|resLo32) == 0;
337 //ZZ return zf;
338 //ZZ }
339 default:
340 /* shouldn't really make these calls from generated code */
341 vex_printf("arm64g_calculate_flag_z"
342 "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
343 cc_op, cc_dep1, cc_dep2, cc_dep3 );
344 vpanic("arm64g_calculate_flag_z");
345 }
346 }
347
348
349 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
350 /* Calculate the C flag from the supplied thunk components, in the
351 least significant bit of the word. Returned bits 63:1 are zero. */
arm64g_calculate_flag_c(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_dep3)352 ULong arm64g_calculate_flag_c ( ULong cc_op, ULong cc_dep1,
353 ULong cc_dep2, ULong cc_dep3 )
354 {
355 switch (cc_op) {
356 case ARM64G_CC_OP_COPY: {
357 /* (nzcv:28x0, unused, unused) */
358 ULong cf = (cc_dep1 >> ARM64G_CC_SHIFT_C) & 1;
359 return cf;
360 }
361 case ARM64G_CC_OP_ADD32: {
362 /* (argL, argR, unused) */
363 UInt argL = (UInt)cc_dep1;
364 UInt argR = (UInt)cc_dep2;
365 UInt res = argL + argR;
366 ULong cf = res < argL;
367 return cf;
368 }
369 case ARM64G_CC_OP_ADD64: {
370 /* (argL, argR, unused) */
371 ULong argL = cc_dep1;
372 ULong argR = cc_dep2;
373 ULong res = argL + argR;
374 ULong cf = res < argL;
375 return cf;
376 }
377 case ARM64G_CC_OP_SUB32: {
378 /* (argL, argR, unused) */
379 UInt argL = (UInt)cc_dep1;
380 UInt argR = (UInt)cc_dep2;
381 ULong cf = argL >= argR;
382 return cf;
383 }
384 case ARM64G_CC_OP_SUB64: {
385 /* (argL, argR, unused) */
386 ULong argL = cc_dep1;
387 ULong argR = cc_dep2;
388 ULong cf = argL >= argR;
389 return cf;
390 }
391 case ARM64G_CC_OP_ADC32: {
392 /* (argL, argR, oldC) */
393 UInt argL = cc_dep1;
394 UInt argR = cc_dep2;
395 UInt oldC = cc_dep3;
396 vassert((oldC & ~1) == 0);
397 UInt res = argL + argR + oldC;
398 ULong cf = oldC ? (res <= argL) : (res < argL);
399 return cf;
400 }
401 case ARM64G_CC_OP_ADC64: {
402 /* (argL, argR, oldC) */
403 ULong argL = cc_dep1;
404 ULong argR = cc_dep2;
405 ULong oldC = cc_dep3;
406 vassert((oldC & ~1) == 0);
407 ULong res = argL + argR + oldC;
408 ULong cf = oldC ? (res <= argL) : (res < argL);
409 return cf;
410 }
411 case ARM64G_CC_OP_SBC32: {
412 /* (argL, argR, oldC) */
413 UInt argL = cc_dep1;
414 UInt argR = cc_dep2;
415 UInt oldC = cc_dep3;
416 vassert((oldC & ~1) == 0);
417 ULong cf = oldC ? (argL >= argR) : (argL > argR);
418 return cf;
419 }
420 case ARM64G_CC_OP_SBC64: {
421 /* (argL, argR, oldC) */
422 ULong argL = cc_dep1;
423 ULong argR = cc_dep2;
424 ULong oldC = cc_dep3;
425 vassert((oldC & ~1) == 0);
426 ULong cf = oldC ? (argL >= argR) : (argL > argR);
427 return cf;
428 }
429 case ARM64G_CC_OP_LOGIC32:
430 case ARM64G_CC_OP_LOGIC64: {
431 /* (res, unused, unused) */
432 return 0; // C after logic is zero on arm64
433 }
434 //ZZ case ARMG_CC_OP_MUL: {
435 //ZZ /* (res, unused, oldC:oldV) */
436 //ZZ UInt oldC = (cc_dep3 >> 1) & 1;
437 //ZZ vassert((cc_dep3 & ~3) == 0);
438 //ZZ UInt cf = oldC;
439 //ZZ return cf;
440 //ZZ }
441 //ZZ case ARMG_CC_OP_MULL: {
442 //ZZ /* (resLo32, resHi32, oldC:oldV) */
443 //ZZ UInt oldC = (cc_dep3 >> 1) & 1;
444 //ZZ vassert((cc_dep3 & ~3) == 0);
445 //ZZ UInt cf = oldC;
446 //ZZ return cf;
447 //ZZ }
448 default:
449 /* shouldn't really make these calls from generated code */
450 vex_printf("arm64g_calculate_flag_c"
451 "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
452 cc_op, cc_dep1, cc_dep2, cc_dep3 );
453 vpanic("arm64g_calculate_flag_c");
454 }
455 }
456
457
458 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
459 /* Calculate the V flag from the supplied thunk components, in the
460 least significant bit of the word. Returned bits 63:1 are zero. */
461 static
arm64g_calculate_flag_v(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_dep3)462 ULong arm64g_calculate_flag_v ( ULong cc_op, ULong cc_dep1,
463 ULong cc_dep2, ULong cc_dep3 )
464 {
465 switch (cc_op) {
466 case ARM64G_CC_OP_COPY: {
467 /* (nzcv:28x0, unused, unused) */
468 ULong vf = (cc_dep1 >> ARM64G_CC_SHIFT_V) & 1;
469 return vf;
470 }
471 case ARM64G_CC_OP_ADD32: {
472 /* (argL, argR, unused) */
473 UInt argL = (UInt)cc_dep1;
474 UInt argR = (UInt)cc_dep2;
475 UInt res = argL + argR;
476 ULong vf = (ULong)(((res ^ argL) & (res ^ argR)) >> 31);
477 return vf;
478 }
479 case ARM64G_CC_OP_ADD64: {
480 /* (argL, argR, unused) */
481 ULong argL = cc_dep1;
482 ULong argR = cc_dep2;
483 ULong res = argL + argR;
484 ULong vf = ((res ^ argL) & (res ^ argR)) >> 63;
485 return vf;
486 }
487 case ARM64G_CC_OP_SUB32: {
488 /* (argL, argR, unused) */
489 UInt argL = (UInt)cc_dep1;
490 UInt argR = (UInt)cc_dep2;
491 UInt res = argL - argR;
492 ULong vf = (ULong)(((argL ^ argR) & (argL ^ res)) >> 31);
493 return vf;
494 }
495 case ARM64G_CC_OP_SUB64: {
496 /* (argL, argR, unused) */
497 ULong argL = cc_dep1;
498 ULong argR = cc_dep2;
499 ULong res = argL - argR;
500 ULong vf = (((argL ^ argR) & (argL ^ res))) >> 63;
501 return vf;
502 }
503 case ARM64G_CC_OP_ADC32: {
504 /* (argL, argR, oldC) */
505 UInt argL = cc_dep1;
506 UInt argR = cc_dep2;
507 UInt oldC = cc_dep3;
508 vassert((oldC & ~1) == 0);
509 UInt res = argL + argR + oldC;
510 ULong vf = (ULong)(((res ^ argL) & (res ^ argR)) >> 31);
511 return vf;
512 }
513 case ARM64G_CC_OP_ADC64: {
514 /* (argL, argR, oldC) */
515 ULong argL = cc_dep1;
516 ULong argR = cc_dep2;
517 ULong oldC = cc_dep3;
518 vassert((oldC & ~1) == 0);
519 ULong res = argL + argR + oldC;
520 ULong vf = ((res ^ argL) & (res ^ argR)) >> 63;
521 return vf;
522 }
523 case ARM64G_CC_OP_SBC32: {
524 /* (argL, argR, oldC) */
525 UInt argL = cc_dep1;
526 UInt argR = cc_dep2;
527 UInt oldC = cc_dep3;
528 vassert((oldC & ~1) == 0);
529 UInt res = argL - argR - (oldC ^ 1);
530 ULong vf = (ULong)(((argL ^ argR) & (argL ^ res)) >> 31);
531 return vf;
532 }
533 case ARM64G_CC_OP_SBC64: {
534 /* (argL, argR, oldC) */
535 ULong argL = cc_dep1;
536 ULong argR = cc_dep2;
537 ULong oldC = cc_dep3;
538 vassert((oldC & ~1) == 0);
539 ULong res = argL - argR - (oldC ^ 1);
540 ULong vf = ((argL ^ argR) & (argL ^ res)) >> 63;
541 return vf;
542 }
543 case ARM64G_CC_OP_LOGIC32:
544 case ARM64G_CC_OP_LOGIC64: {
545 /* (res, unused, unused) */
546 return 0; // V after logic is zero on arm64
547 }
548 //ZZ case ARMG_CC_OP_MUL: {
549 //ZZ /* (res, unused, oldC:oldV) */
550 //ZZ UInt oldV = (cc_dep3 >> 0) & 1;
551 //ZZ vassert((cc_dep3 & ~3) == 0);
552 //ZZ UInt vf = oldV;
553 //ZZ return vf;
554 //ZZ }
555 //ZZ case ARMG_CC_OP_MULL: {
556 //ZZ /* (resLo32, resHi32, oldC:oldV) */
557 //ZZ UInt oldV = (cc_dep3 >> 0) & 1;
558 //ZZ vassert((cc_dep3 & ~3) == 0);
559 //ZZ UInt vf = oldV;
560 //ZZ return vf;
561 //ZZ }
562 default:
563 /* shouldn't really make these calls from generated code */
564 vex_printf("arm64g_calculate_flag_v"
565 "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
566 cc_op, cc_dep1, cc_dep2, cc_dep3 );
567 vpanic("arm64g_calculate_flag_v");
568 }
569 }
570
571
572 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
573 /* Calculate NZCV from the supplied thunk components, in the positions
574 they appear in the CPSR, viz bits 31:28 for N Z C V respectively.
575 Returned bits 27:0 are zero. */
arm64g_calculate_flags_nzcv(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_dep3)576 ULong arm64g_calculate_flags_nzcv ( ULong cc_op, ULong cc_dep1,
577 ULong cc_dep2, ULong cc_dep3 )
578 {
579 ULong f;
580 ULong res = 0;
581 f = 1 & arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
582 res |= (f << ARM64G_CC_SHIFT_N);
583 f = 1 & arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
584 res |= (f << ARM64G_CC_SHIFT_Z);
585 f = 1 & arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
586 res |= (f << ARM64G_CC_SHIFT_C);
587 f = 1 & arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
588 res |= (f << ARM64G_CC_SHIFT_V);
589 return res;
590 }
591
592 //ZZ
593 //ZZ /* CALLED FROM GENERATED CODE: CLEAN HELPER */
594 //ZZ /* Calculate the QC flag from the arguments, in the lowest bit
595 //ZZ of the word (bit 0). Urr, having this out of line is bizarre.
596 //ZZ Push back inline. */
597 //ZZ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
598 //ZZ UInt resR1, UInt resR2 )
599 //ZZ {
600 //ZZ if (resL1 != resR1 || resL2 != resR2)
601 //ZZ return 1;
602 //ZZ else
603 //ZZ return 0;
604 //ZZ }
605
606 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
607 /* Calculate the specified condition from the thunk components, in the
608 lowest bit of the word (bit 0). Returned bits 63:1 are zero. */
arm64g_calculate_condition(ULong cond_n_op,ULong cc_dep1,ULong cc_dep2,ULong cc_dep3)609 ULong arm64g_calculate_condition ( /* ARM64Condcode << 4 | cc_op */
610 ULong cond_n_op ,
611 ULong cc_dep1,
612 ULong cc_dep2, ULong cc_dep3 )
613 {
614 ULong cond = cond_n_op >> 4;
615 ULong cc_op = cond_n_op & 0xF;
616 ULong inv = cond & 1;
617 ULong nf, zf, vf, cf;
618
619 # if PROFILE_NZCV_FLAGS
620 NOTE_EVAL(cc_op, cond);
621 # endif
622
623 // vex_printf("XXXXXXXX %llx %llx %llx %llx\n",
624 // cond_n_op, cc_dep1, cc_dep2, cc_dep3);
625
626 switch (cond) {
627 case ARM64CondEQ: // Z=1 => z
628 case ARM64CondNE: // Z=0
629 zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
630 return inv ^ zf;
631
632 case ARM64CondCS: // C=1 => c
633 case ARM64CondCC: // C=0
634 cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
635 return inv ^ cf;
636
637 case ARM64CondMI: // N=1 => n
638 case ARM64CondPL: // N=0
639 nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
640 return inv ^ nf;
641
642 case ARM64CondVS: // V=1 => v
643 case ARM64CondVC: // V=0
644 vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
645 return inv ^ vf;
646
647 case ARM64CondHI: // C=1 && Z=0 => c & ~z
648 case ARM64CondLS: // C=0 || Z=1
649 cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
650 zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
651 return inv ^ (1 & (cf & ~zf));
652
653 case ARM64CondGE: // N=V => ~(n^v)
654 case ARM64CondLT: // N!=V
655 nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
656 vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
657 return inv ^ (1 & ~(nf ^ vf));
658
659 case ARM64CondGT: // Z=0 && N=V => ~z & ~(n^v) => ~(z | (n^v))
660 case ARM64CondLE: // Z=1 || N!=V
661 nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
662 vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
663 zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
664 return inv ^ (1 & ~(zf | (nf ^ vf)));
665
666 case ARM64CondAL: // 1
667 case ARM64CondNV: // 1
668 return 1;
669
670 default:
671 /* shouldn't really make these calls from generated code */
672 vex_printf("arm64g_calculate_condition(ARM64)"
673 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
674 cond, cc_op, cc_dep1, cc_dep2, cc_dep3 );
675 vpanic("armg_calculate_condition(ARM64)");
676 }
677 }
678
679
680 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
arm64g_calc_crc32b(ULong acc,ULong bits)681 ULong arm64g_calc_crc32b ( ULong acc, ULong bits )
682 {
683 UInt i;
684 ULong crc = (bits & 0xFFULL) ^ acc;
685 for (i = 0; i < 8; i++)
686 crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
687 return crc;
688 }
689
690 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
arm64g_calc_crc32h(ULong acc,ULong bits)691 ULong arm64g_calc_crc32h ( ULong acc, ULong bits )
692 {
693 UInt i;
694 ULong crc = (bits & 0xFFFFULL) ^ acc;
695 for (i = 0; i < 16; i++)
696 crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
697 return crc;
698 }
699
700 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
arm64g_calc_crc32w(ULong acc,ULong bits)701 ULong arm64g_calc_crc32w ( ULong acc, ULong bits )
702 {
703 UInt i;
704 ULong crc = (bits & 0xFFFFFFFFULL) ^ acc;
705 for (i = 0; i < 32; i++)
706 crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
707 return crc;
708 }
709
710 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
arm64g_calc_crc32x(ULong acc,ULong bits)711 ULong arm64g_calc_crc32x ( ULong acc, ULong bits )
712 {
713 UInt i;
714 ULong crc = bits ^ acc;
715 for (i = 0; i < 64; i++)
716 crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
717 return crc;
718
719 }
720
721 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
arm64g_calc_crc32cb(ULong acc,ULong bits)722 ULong arm64g_calc_crc32cb ( ULong acc, ULong bits )
723 {
724 UInt i;
725 ULong crc = (bits & 0xFFULL) ^ acc;
726 for (i = 0; i < 8; i++)
727 crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
728 return crc;
729 }
730
731 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
arm64g_calc_crc32ch(ULong acc,ULong bits)732 ULong arm64g_calc_crc32ch ( ULong acc, ULong bits )
733 {
734 UInt i;
735 ULong crc = (bits & 0xFFFFULL) ^ acc;
736 for (i = 0; i < 16; i++)
737 crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
738 return crc;
739 }
740
741 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
arm64g_calc_crc32cw(ULong acc,ULong bits)742 ULong arm64g_calc_crc32cw ( ULong acc, ULong bits )
743 {
744 UInt i;
745 ULong crc = (bits & 0xFFFFFFFFULL) ^ acc;
746 for (i = 0; i < 32; i++)
747 crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
748 return crc;
749 }
750
751 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
arm64g_calc_crc32cx(ULong acc,ULong bits)752 ULong arm64g_calc_crc32cx ( ULong acc, ULong bits )
753 {
754 UInt i;
755 ULong crc = bits ^ acc;
756 for (i = 0; i < 64; i++)
757 crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
758 return crc;
759 }
760
761
762 /* CALLED FROM GENERATED CODE */
763 /* DIRTY HELPER (non-referentially-transparent) */
764 /* Horrible hack. On non-arm64 platforms, return 0. */
arm64g_dirtyhelper_MRS_CNTVCT_EL0(void)765 ULong arm64g_dirtyhelper_MRS_CNTVCT_EL0 ( void )
766 {
767 # if defined(__aarch64__) && !defined(__arm__)
768 ULong w = 0x5555555555555555ULL; /* overwritten */
769 __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(w));
770 return w;
771 # else
772 return 0ULL;
773 # endif
774 }
775
776
777 /* CALLED FROM GENERATED CODE */
778 /* DIRTY HELPER (non-referentially-transparent) */
779 /* Horrible hack. On non-arm64 platforms, return 0. */
arm64g_dirtyhelper_MRS_CNTFRQ_EL0(void)780 ULong arm64g_dirtyhelper_MRS_CNTFRQ_EL0 ( void )
781 {
782 # if defined(__aarch64__) && !defined(__arm__)
783 ULong w = 0x5555555555555555ULL; /* overwritten */
784 __asm__ __volatile__("mrs %0, cntfrq_el0" : "=r"(w));
785 return w;
786 # else
787 return 0ULL;
788 # endif
789 }
790
791
arm64g_dirtyhelper_PMULLQ(V128 * res,ULong arg1,ULong arg2)792 void arm64g_dirtyhelper_PMULLQ ( /*OUT*/V128* res, ULong arg1, ULong arg2 )
793 {
794 /* This doesn't need to be a dirty helper, except for the fact that
795 a clean helper can't return a 128 bit value. This is a pretty
796 lame implementation of PMULLQ, but at least it doesn't contain any
797 data dependent branches, and has lots of ILP. I guess we could unroll
798 the loop completely and offer extensive prayers to the gods of ILP
799 if more performance is needed. */
800 UInt i;
801 ULong accHi = 0, accLo = 0;
802 ULong op2Hi = 0, op2Lo = arg2;
803 for (i = 0; i < 64; i++) {
804 /* Make |mask| be all 0s or all 1s, a copy of arg1[i] */
805 Long mask = arg1 << (63-i);
806 mask >>= 63;
807 accHi ^= (op2Hi & mask);
808 accLo ^= (op2Lo & mask);
809 /* do: op2Hi:op2Lo <<=u 1 */
810 op2Hi <<= 1;
811 op2Hi |= ((op2Lo >> 63) & 1);
812 op2Lo <<= 1;
813 }
814 res->w64[1] = accHi;
815 res->w64[0] = accLo;
816 }
817
818
819 /*---------------------------------------------------------------*/
820 /*--- Crypto instruction helpers ---*/
821 /*---------------------------------------------------------------*/
822
823 /* DIRTY HELPERS for doing AES support:
824 * AESE (SubBytes, then ShiftRows)
825 * AESD (InvShiftRows, then InvSubBytes)
826 * AESMC (MixColumns)
827 * AESIMC (InvMixColumns)
828 These don't actually have to be dirty helpers -- they could be
829 clean, but for the fact that they return a V128 and a clean helper
830 can't do that.
831
832 The ARMv8 manual seems to imply that AESE first performs ShiftRows,
833 then SubBytes. This seems to contradict FIPS 197, so the
834 implementation below is consistent with FIPS 197. One can observe
835 that the two transformations commute -- the order in which they
836 happen makes no difference to the result. So the ambiguity doesn't
837 actually matter, but it is confusing. The v8 manual looks correct
838 about AESD, though.
839
840 The three functions rj_xtime, aesMixColumn and aesInvMixColumn only,
841 are taken from "A byte-oriented AES-256 implementation" and are subject
842 to the following usage terms:
843
844 Byte-oriented AES-256 implementation.
845 All lookup tables replaced with 'on the fly' calculations.
846
847 Copyright (c) 2007-2011 Ilya O. Levin, http://www.literatecode.com
848 Other contributors: Hal Finney
849
850 Permission to use, copy, modify, and distribute this software for any
851 purpose with or without fee is hereby granted, provided that the above
852 copyright notice and this permission notice appear in all copies.
853
854 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
855 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
856 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
857 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
858 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
859 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
860 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
861 */
862
863 const UChar aesMapSubBytes[256]
864 = { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
865 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
866 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
867 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
868 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
869 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
870 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
871 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
872 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
873 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
874 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
875 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
876 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
877 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
878 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
879 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
880 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
881 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
882 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
883 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
884 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
885 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
886 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
887 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
888 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
889 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
890 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
891 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
892 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
893 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
894 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
895 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
896 };
897
898 const UChar aesMapInvSubBytes[256]
899 = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
900 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
901 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
902 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
903 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
904 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
905 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
906 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
907 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
908 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
909 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
910 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
911 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
912 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
913 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
914 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
915 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
916 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
917 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
918 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
919 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
920 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
921 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
922 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
923 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
924 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
925 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
926 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
927 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
928 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
929 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
930 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
931 };
932
rj_xtime(UChar x)933 static inline UChar rj_xtime ( UChar x )
934 {
935 UChar y = (UChar)(x << 1);
936 return (x & 0x80) ? (y ^ 0x1b) : y;
937 }
938
aesMixColumn(UChar * r)939 static void aesMixColumn ( /*MOD*/UChar* r )
940 {
941 UChar a = r[0];
942 UChar b = r[1];
943 UChar c = r[2];
944 UChar d = r[3];
945 UChar e = a ^ b ^ c ^ d;
946 r[0] ^= e ^ rj_xtime(a ^ b);
947 r[1] ^= e ^ rj_xtime(b ^ c);
948 r[2] ^= e ^ rj_xtime(c ^ d);
949 r[3] ^= e ^ rj_xtime(d ^ a);
950 }
951
aesInvMixColumn(UChar * r)952 static void aesInvMixColumn ( /*MOD*/UChar* r )
953 {
954 UChar a = r[0];
955 UChar b = r[1];
956 UChar c = r[2];
957 UChar d = r[3];
958 UChar e = a ^ b ^ c ^ d;
959 UChar z = rj_xtime(e);
960 UChar x = e ^ rj_xtime(rj_xtime(z ^ a ^ c));
961 UChar y = e ^ rj_xtime(rj_xtime(z ^ b ^ d));
962 r[0] ^= x ^ rj_xtime(a ^ b);
963 r[1] ^= y ^ rj_xtime(b ^ c);
964 r[2] ^= x ^ rj_xtime(c ^ d);
965 r[3] ^= y ^ rj_xtime(d ^ a);
966 }
967
968
969 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_AESE(V128 * res,ULong argHi,ULong argLo)970 void arm64g_dirtyhelper_AESE ( /*OUT*/V128* res, ULong argHi, ULong argLo )
971 {
972 res->w64[1] = argHi;
973 res->w64[0] = argLo;
974
975 /* First do SubBytes on the State. */
976 UInt i;
977 for (i = 0; i < 16; i++) {
978 res->w8[i] = aesMapSubBytes[res->w8[i] & 0xFF];
979 }
980
981 /* Then do ShiftRows on the State. */
982 # define XX(_ix) res->w8[_ix]
983 { UChar old1 = XX(1);
984 XX(1) = XX(5); XX(5) = XX(9); XX(9) = XX(13); XX(13) = old1;
985 }
986 { UChar old2 = XX(2); UChar old6 = XX(6);
987 XX(2) = XX(10); XX(6) = XX(14); XX(10) = old2; XX(14) = old6;
988 }
989 { UChar old15 = XX(15);
990 XX(15) = XX(11); XX(11) = XX(7); XX(7) = XX(3); XX(3) = old15;
991 }
992 # undef XX
993 }
994
995
996 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_AESD(V128 * res,ULong argHi,ULong argLo)997 void arm64g_dirtyhelper_AESD ( /*OUT*/V128* res, ULong argHi, ULong argLo )
998 {
999 res->w64[1] = argHi;
1000 res->w64[0] = argLo;
1001
1002 /* First do InvShiftRows on the State. */
1003 # define XX(_ix) res->w8[_ix]
1004 { UChar old13 = XX(13);
1005 XX(13) = XX(9); XX(9) = XX(5); XX(5) = XX(1); XX(1) = old13;
1006 }
1007 { UChar old14 = XX(14); UChar old10 = XX(10);
1008 XX(14) = XX(6); XX(10) = XX(2); XX(6) = old14; XX(2) = old10;
1009 }
1010 { UChar old3 = XX(3);
1011 XX(3) = XX(7); XX(7) = XX(11); XX(11) = XX(15); XX(15) = old3;
1012 }
1013 # undef XX
1014
1015 /* Then do InvSubBytes on the State. */
1016 UInt i;
1017 for (i = 0; i < 16; i++) {
1018 res->w8[i] = aesMapInvSubBytes[res->w8[i] & 0xFF];
1019 }
1020 }
1021
1022
1023 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_AESMC(V128 * res,ULong argHi,ULong argLo)1024 void arm64g_dirtyhelper_AESMC ( /*OUT*/V128* res, ULong argHi, ULong argLo )
1025 {
1026 res->w64[1] = argHi;
1027 res->w64[0] = argLo;
1028 aesMixColumn(&res->w8[0]);
1029 aesMixColumn(&res->w8[4]);
1030 aesMixColumn(&res->w8[8]);
1031 aesMixColumn(&res->w8[12]);
1032 }
1033
1034
1035 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_AESIMC(V128 * res,ULong argHi,ULong argLo)1036 void arm64g_dirtyhelper_AESIMC ( /*OUT*/V128* res, ULong argHi, ULong argLo )
1037 {
1038 res->w64[1] = argHi;
1039 res->w64[0] = argLo;
1040 aesInvMixColumn(&res->w8[0]);
1041 aesInvMixColumn(&res->w8[4]);
1042 aesInvMixColumn(&res->w8[8]);
1043 aesInvMixColumn(&res->w8[12]);
1044 }
1045
1046
1047 /* DIRTY HELPERS for SHA instruction support. As with the AES helpers
1048 above, these are actually pure functions and are only dirty because
1049 clean helpers can't return a V128. */
1050
ROL32(UInt x,UInt sh)1051 static inline UInt ROL32 ( UInt x, UInt sh ) {
1052 vassert(sh > 0 && sh < 32);
1053 return (x << sh) | (x >> (32 - sh));
1054 }
1055
ROR32(UInt x,UInt sh)1056 static inline UInt ROR32 ( UInt x, UInt sh ) {
1057 vassert(sh > 0 && sh < 32);
1058 return (x >> sh) | (x << (32 - sh));
1059 }
1060
SHAchoose(UInt x,UInt y,UInt z)1061 static inline UInt SHAchoose ( UInt x, UInt y, UInt z ) {
1062 return ((y ^ z) & x) ^ z;
1063 }
1064
SHAmajority(UInt x,UInt y,UInt z)1065 static inline UInt SHAmajority ( UInt x, UInt y, UInt z ) {
1066 return (x & y) | ((x | y) & z);
1067 }
1068
SHAparity(UInt x,UInt y,UInt z)1069 static inline UInt SHAparity ( UInt x, UInt y, UInt z ) {
1070 return x ^ y ^ z;
1071 }
1072
SHAhashSIGMA0(UInt x)1073 static inline UInt SHAhashSIGMA0 ( UInt x ) {
1074 return ROR32(x, 2) ^ ROR32(x, 13) ^ ROR32(x, 22);
1075 }
1076
SHAhashSIGMA1(UInt x)1077 static inline UInt SHAhashSIGMA1 ( UInt x ) {
1078 return ROR32(x, 6) ^ ROR32(x, 11) ^ ROR32(x, 25);
1079 }
1080
SHA256hash(V128 * X,V128 * Y,const V128 * W)1081 static void SHA256hash ( /*MOD*/V128* X, /*MOD*/V128* Y, const V128* W )
1082 {
1083 UInt e;
1084 for (e = 0; e <= 3; e++) {
1085 UInt chs = SHAchoose(Y->w32[0], Y->w32[1], Y->w32[2]);
1086 UInt maj = SHAmajority(X->w32[0], X->w32[1], X->w32[2]);
1087 UInt t = Y->w32[3] + SHAhashSIGMA1(Y->w32[0]) + chs + W->w32[e];
1088 X->w32[3] = t + X->w32[3];
1089 Y->w32[3] = t + SHAhashSIGMA0(X->w32[0]) + maj;
1090 UInt ts = Y->w32[3];
1091 Y->w32[3] = Y->w32[2];
1092 Y->w32[2] = Y->w32[1];
1093 Y->w32[1] = Y->w32[0];
1094 Y->w32[0] = X->w32[3];
1095 X->w32[3] = X->w32[2];
1096 X->w32[2] = X->w32[1];
1097 X->w32[1] = X->w32[0];
1098 X->w32[0] = ts;
1099 }
1100 }
1101
1102 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_SHA1C(V128 * res,ULong dHi,ULong dLo,ULong nHi,ULong nLo,ULong mHi,ULong mLo)1103 void arm64g_dirtyhelper_SHA1C ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1104 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1105 {
1106 vassert(nHi == 0);
1107 vassert((nLo >> 32) == 0);
1108 V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
1109 UInt Y; Y = (UInt)nLo;
1110 V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1111 UInt e;
1112 for (e = 0; e <= 3; e++) {
1113 UInt t = SHAchoose(X.w32[1], X.w32[2], X.w32[3]);
1114 Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
1115 X.w32[1] = ROL32(X.w32[1], 30);
1116 UInt oldY = Y;
1117 Y = X.w32[3];
1118 X.w32[3] = X.w32[2];
1119 X.w32[2] = X.w32[1];
1120 X.w32[1] = X.w32[0];
1121 X.w32[0] = oldY;
1122 }
1123 res->w64[1] = X.w64[1];
1124 res->w64[0] = X.w64[0];
1125 }
1126
1127 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_SHA1H(V128 * res,ULong nHi,ULong nLo)1128 void arm64g_dirtyhelper_SHA1H ( /*OUT*/V128* res, ULong nHi, ULong nLo )
1129 {
1130 vassert(nHi == 0);
1131 vassert((nLo >> 32) == 0);
1132 res->w32[3] = res->w32[2] = res->w32[1] = 0;
1133 res->w32[0] = ROL32((UInt)nLo, 30);
1134 }
1135
1136 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_SHA1M(V128 * res,ULong dHi,ULong dLo,ULong nHi,ULong nLo,ULong mHi,ULong mLo)1137 void arm64g_dirtyhelper_SHA1M ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1138 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1139 {
1140 vassert(nHi == 0);
1141 vassert((nLo >> 32) == 0);
1142 V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
1143 UInt Y; Y = (UInt)nLo;
1144 V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1145 UInt e;
1146 for (e = 0; e <= 3; e++) {
1147 UInt t = SHAmajority(X.w32[1], X.w32[2], X.w32[3]);
1148 Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
1149 X.w32[1] = ROL32(X.w32[1], 30);
1150 UInt oldY = Y;
1151 Y = X.w32[3];
1152 X.w32[3] = X.w32[2];
1153 X.w32[2] = X.w32[1];
1154 X.w32[1] = X.w32[0];
1155 X.w32[0] = oldY;
1156 }
1157 res->w64[1] = X.w64[1];
1158 res->w64[0] = X.w64[0];
1159 }
1160
1161 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_SHA1P(V128 * res,ULong dHi,ULong dLo,ULong nHi,ULong nLo,ULong mHi,ULong mLo)1162 void arm64g_dirtyhelper_SHA1P ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1163 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1164 {
1165 vassert(nHi == 0);
1166 vassert((nLo >> 32) == 0);
1167 V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
1168 UInt Y; Y = (UInt)nLo;
1169 V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1170 UInt e;
1171 for (e = 0; e <= 3; e++) {
1172 UInt t = SHAparity(X.w32[1], X.w32[2], X.w32[3]);
1173 Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
1174 X.w32[1] = ROL32(X.w32[1], 30);
1175 UInt oldY = Y;
1176 Y = X.w32[3];
1177 X.w32[3] = X.w32[2];
1178 X.w32[2] = X.w32[1];
1179 X.w32[1] = X.w32[0];
1180 X.w32[0] = oldY;
1181 }
1182 res->w64[1] = X.w64[1];
1183 res->w64[0] = X.w64[0];
1184 }
1185
1186 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_SHA1SU0(V128 * res,ULong dHi,ULong dLo,ULong nHi,ULong nLo,ULong mHi,ULong mLo)1187 void arm64g_dirtyhelper_SHA1SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1188 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1189 {
1190 res->w64[1] = nLo;
1191 res->w64[0] = dHi;
1192 res->w64[1] ^= dHi ^ mHi;
1193 res->w64[0] ^= dLo ^ mLo;
1194 }
1195
1196 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_SHA1SU1(V128 * res,ULong dHi,ULong dLo,ULong nHi,ULong nLo)1197 void arm64g_dirtyhelper_SHA1SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1198 ULong nHi, ULong nLo )
1199 {
1200 /* This computes "T = Vd ^ (Vn >>u 32)" */
1201 V128 T; T.w64[1] = nHi; T.w64[0] = nLo;
1202 T.w32[0] = T.w32[1];
1203 T.w32[1] = T.w32[2];
1204 T.w32[2] = T.w32[3];
1205 T.w32[3] = 0;
1206 T.w64[1] ^= dHi;
1207 T.w64[0] ^= dLo;
1208 /* */
1209 res->w32[0] = ROL32(T.w32[0], 1);
1210 res->w32[1] = ROL32(T.w32[1], 1);
1211 res->w32[2] = ROL32(T.w32[2], 1);
1212 res->w32[3] = ROL32(T.w32[3], 1) ^ ROL32(T.w32[0], 2);
1213 }
1214
1215 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_SHA256H2(V128 * res,ULong dHi,ULong dLo,ULong nHi,ULong nLo,ULong mHi,ULong mLo)1216 void arm64g_dirtyhelper_SHA256H2 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1217 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1218 {
1219 V128 X; X.w64[1] = nHi; X.w64[0] = nLo;
1220 V128 Y; Y.w64[1] = dHi; Y.w64[0] = dLo;
1221 V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1222 SHA256hash(&X, &Y, &W);
1223 res->w64[1] = Y.w64[1];
1224 res->w64[0] = Y.w64[0];
1225 }
1226
1227 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_SHA256H(V128 * res,ULong dHi,ULong dLo,ULong nHi,ULong nLo,ULong mHi,ULong mLo)1228 void arm64g_dirtyhelper_SHA256H ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1229 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1230 {
1231 V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
1232 V128 Y; Y.w64[1] = nHi; Y.w64[0] = nLo;
1233 V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1234 SHA256hash(&X, &Y, &W);
1235 res->w64[1] = X.w64[1];
1236 res->w64[0] = X.w64[0];
1237 }
1238
1239 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_SHA256SU0(V128 * res,ULong dHi,ULong dLo,ULong nHi,ULong nLo)1240 void arm64g_dirtyhelper_SHA256SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1241 ULong nHi, ULong nLo )
1242
1243 {
1244 res->w64[1] = res->w64[0] = 0;
1245 V128 op1; op1.w64[1] = dHi; op1.w64[0] = dLo;
1246 V128 op2; op2.w64[1] = nHi; op2.w64[0] = nLo;
1247 V128 T;
1248 T.w32[3] = op2.w32[0];
1249 T.w32[2] = op1.w32[3];
1250 T.w32[1] = op1.w32[2];
1251 T.w32[0] = op1.w32[1];
1252 UInt e;
1253 for (e = 0; e <= 3; e++) {
1254 UInt elt = T.w32[e];
1255 elt = ROR32(elt, 7) ^ ROR32(elt, 18) ^ (elt >> 3);
1256 res->w32[e] = elt + op1.w32[e];
1257 }
1258 }
1259
1260 /* CALLED FROM GENERATED CODE */
arm64g_dirtyhelper_SHA256SU1(V128 * res,ULong dHi,ULong dLo,ULong nHi,ULong nLo,ULong mHi,ULong mLo)1261 void arm64g_dirtyhelper_SHA256SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1262 ULong nHi, ULong nLo,
1263 ULong mHi, ULong mLo )
1264 {
1265 res->w64[0] = res->w64[1] = 0;
1266 V128 op1; op1.w64[1] = dHi; op1.w64[0] = dLo;
1267 V128 op2; op2.w64[1] = nHi; op2.w64[0] = nLo;
1268 V128 op3; op3.w64[1] = mHi; op3.w64[0] = mLo;
1269 V128 T0;
1270 T0.w32[3] = op3.w32[0];
1271 T0.w32[2] = op2.w32[3];
1272 T0.w32[1] = op2.w32[2];
1273 T0.w32[0] = op2.w32[1];
1274 UInt T1[2];
1275 UInt e;
1276 T1[1] = op3.w32[3];
1277 T1[0] = op3.w32[2];
1278 for (e = 0; e <= 1; e++) {
1279 UInt elt = T1[e];
1280 elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ (elt >> 10);
1281 elt = elt + op1.w32[e] + T0.w32[e];
1282 res->w32[e] = elt;
1283 }
1284 T1[1] = res->w32[1];
1285 T1[0] = res->w32[0];
1286 for (e = 2; e <= 3; e++) {
1287 UInt elt = T1[e-2];
1288 elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ (elt >> 10);
1289 elt = elt + op1.w32[e] + T0.w32[e];
1290 res->w32[e] = elt;
1291 }
1292 }
1293
1294
1295 /*---------------------------------------------------------------*/
1296 /*--- Flag-helpers translation-time function specialisers. ---*/
1297 /*--- These help iropt specialise calls the above run-time ---*/
1298 /*--- flags functions. ---*/
1299 /*---------------------------------------------------------------*/
1300
1301 /* Used by the optimiser to try specialisations. Returns an
1302 equivalent expression, or NULL if none. */
1303
isU64(IRExpr * e,ULong n)1304 static Bool isU64 ( IRExpr* e, ULong n )
1305 {
1306 return
1307 toBool( e->tag == Iex_Const
1308 && e->Iex.Const.con->tag == Ico_U64
1309 && e->Iex.Const.con->Ico.U64 == n );
1310 }
1311
guest_arm64_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)1312 IRExpr* guest_arm64_spechelper ( const HChar* function_name,
1313 IRExpr** args,
1314 IRStmt** precedingStmts,
1315 Int n_precedingStmts )
1316 {
1317 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
1318 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
1319 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
1320 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
1321
1322 Int i, arity = 0;
1323 for (i = 0; args[i]; i++)
1324 arity++;
1325 //ZZ # if 0
1326 //ZZ vex_printf("spec request:\n");
1327 //ZZ vex_printf(" %s ", function_name);
1328 //ZZ for (i = 0; i < arity; i++) {
1329 //ZZ vex_printf(" ");
1330 //ZZ ppIRExpr(args[i]);
1331 //ZZ }
1332 //ZZ vex_printf("\n");
1333 //ZZ # endif
1334
1335 /* --------- specialising "arm64g_calculate_condition" --------- */
1336
1337 if (vex_streq(function_name, "arm64g_calculate_condition")) {
1338
1339 /* specialise calls to the "arm64g_calculate_condition" function.
1340 Not sure whether this is strictly necessary, but: the
1341 replacement IR must produce only the values 0 or 1. Bits
1342 63:1 are required to be zero. */
1343 IRExpr *cond_n_op, *cc_dep1, *cc_dep2 ; //, *cc_ndep;
1344 vassert(arity == 4);
1345 cond_n_op = args[0]; /* (ARM64Condcode << 4) | ARM64G_CC_OP_* */
1346 cc_dep1 = args[1];
1347 cc_dep2 = args[2];
1348 //cc_ndep = args[3];
1349
1350 /*---------------- SUB64 ----------------*/
1351
1352 /* 0, 1 */
1353 if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB64)) {
1354 /* EQ after SUB --> test argL == argR */
1355 return unop(Iop_1Uto64,
1356 binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
1357 }
1358 if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB64)) {
1359 /* NE after SUB --> test argL != argR */
1360 return unop(Iop_1Uto64,
1361 binop(Iop_CmpNE64, cc_dep1, cc_dep2));
1362 }
1363
1364 /* 2, 3 */
1365 if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB64)) {
1366 /* CS after SUB --> test argL >=u argR
1367 --> test argR <=u argL */
1368 return unop(Iop_1Uto64,
1369 binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1370 }
1371 if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB64)) {
1372 /* CC after SUB --> test argL <u argR */
1373 return unop(Iop_1Uto64,
1374 binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1375 }
1376
1377 /* 8, 9 */
1378 if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB64)) {
1379 /* LS after SUB --> test argL <=u argR */
1380 return unop(Iop_1Uto64,
1381 binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1382 }
1383 if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB64)) {
1384 /* HI after SUB --> test argL >u argR
1385 --> test argR <u argL */
1386 return unop(Iop_1Uto64,
1387 binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
1388 }
1389
1390 /* 10, 11 */
1391 if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB64)) {
1392 /* LT after SUB --> test argL <s argR */
1393 return unop(Iop_1Uto64,
1394 binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1395 }
1396 if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB64)) {
1397 /* GE after SUB --> test argL >=s argR
1398 --> test argR <=s argL */
1399 return unop(Iop_1Uto64,
1400 binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1401 }
1402
1403 /* 12, 13 */
1404 if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB64)) {
1405 /* GT after SUB --> test argL >s argR
1406 --> test argR <s argL */
1407 return unop(Iop_1Uto64,
1408 binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1409 }
1410 if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB64)) {
1411 /* LE after SUB --> test argL <=s argR */
1412 return unop(Iop_1Uto64,
1413 binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1414 }
1415
1416 /*---------------- SUB32 ----------------*/
1417
1418 /* 0, 1 */
1419 if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB32)) {
1420 /* EQ after SUB --> test argL == argR */
1421 return unop(Iop_1Uto64,
1422 binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
1423 unop(Iop_64to32, cc_dep2)));
1424 }
1425 if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB32)) {
1426 /* NE after SUB --> test argL != argR */
1427 return unop(Iop_1Uto64,
1428 binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1),
1429 unop(Iop_64to32, cc_dep2)));
1430 }
1431
1432 /* 2, 3 */
1433 if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB32)) {
1434 /* CS after SUB --> test argL >=u argR
1435 --> test argR <=u argL */
1436 return unop(Iop_1Uto64,
1437 binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep2),
1438 unop(Iop_64to32, cc_dep1)));
1439 }
1440 if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB32)) {
1441 /* CC after SUB --> test argL <u argR */
1442 return unop(Iop_1Uto64,
1443 binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep1),
1444 unop(Iop_64to32, cc_dep2)));
1445 }
1446
1447 /* 8, 9 */
1448 if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB32)) {
1449 /* LS after SUB --> test argL <=u argR */
1450 return unop(Iop_1Uto64,
1451 binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep1),
1452 unop(Iop_64to32, cc_dep2)));
1453 }
1454 if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB32)) {
1455 /* HI after SUB --> test argL >u argR
1456 --> test argR <u argL */
1457 return unop(Iop_1Uto64,
1458 binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep2),
1459 unop(Iop_64to32, cc_dep1)));
1460 }
1461
1462 /* 10, 11 */
1463 if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB32)) {
1464 /* LT after SUB --> test argL <s argR */
1465 return unop(Iop_1Uto64,
1466 binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep1),
1467 unop(Iop_64to32, cc_dep2)));
1468 }
1469 if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB32)) {
1470 /* GE after SUB --> test argL >=s argR
1471 --> test argR <=s argL */
1472 return unop(Iop_1Uto64,
1473 binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep2),
1474 unop(Iop_64to32, cc_dep1)));
1475 }
1476
1477 /* 12, 13 */
1478 if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB32)) {
1479 /* GT after SUB --> test argL >s argR
1480 --> test argR <s argL */
1481 return unop(Iop_1Uto64,
1482 binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep2),
1483 unop(Iop_64to32, cc_dep1)));
1484 }
1485 if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB32)) {
1486 /* LE after SUB --> test argL <=s argR */
1487 return unop(Iop_1Uto64,
1488 binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep1),
1489 unop(Iop_64to32, cc_dep2)));
1490 }
1491
1492 //ZZ /*---------------- SBB ----------------*/
1493 //ZZ
1494 //ZZ if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SBB)) {
1495 //ZZ /* This seems to happen a lot in softfloat code, eg __divdf3+140 */
1496 //ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1497 //ZZ /* HS after SBB (same as C after SBB below)
1498 //ZZ --> oldC ? (argL >=u argR) : (argL >u argR)
1499 //ZZ --> oldC ? (argR <=u argL) : (argR <u argL)
1500 //ZZ */
1501 //ZZ return
1502 //ZZ IRExpr_ITE(
1503 //ZZ binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
1504 //ZZ /* case oldC != 0 */
1505 //ZZ unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
1506 //ZZ /* case oldC == 0 */
1507 //ZZ unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
1508 //ZZ );
1509 //ZZ }
1510 //ZZ
1511 //ZZ /*---------------- LOGIC ----------------*/
1512 //ZZ
1513 //ZZ if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_LOGIC)) {
1514 //ZZ /* EQ after LOGIC --> test res == 0 */
1515 //ZZ return unop(Iop_1Uto32,
1516 //ZZ binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1517 //ZZ }
1518 //ZZ if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_LOGIC)) {
1519 //ZZ /* NE after LOGIC --> test res != 0 */
1520 //ZZ return unop(Iop_1Uto32,
1521 //ZZ binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
1522 //ZZ }
1523 //ZZ
1524 //ZZ if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_LOGIC)) {
1525 //ZZ /* PL after LOGIC --> test (res >> 31) == 0 */
1526 //ZZ return unop(Iop_1Uto32,
1527 //ZZ binop(Iop_CmpEQ32,
1528 //ZZ binop(Iop_Shr32, cc_dep1, mkU8(31)),
1529 //ZZ mkU32(0)));
1530 //ZZ }
1531 //ZZ if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_LOGIC)) {
1532 //ZZ /* MI after LOGIC --> test (res >> 31) == 1 */
1533 //ZZ return unop(Iop_1Uto32,
1534 //ZZ binop(Iop_CmpEQ32,
1535 //ZZ binop(Iop_Shr32, cc_dep1, mkU8(31)),
1536 //ZZ mkU32(1)));
1537 //ZZ }
1538
1539 /*---------------- COPY ----------------*/
1540
1541 if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_COPY)) {
1542 /* EQ after COPY --> (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1 */
1543 return binop(Iop_And64,
1544 binop(Iop_Shr64, cc_dep1,
1545 mkU8(ARM64G_CC_SHIFT_Z)),
1546 mkU64(1));
1547 }
1548 if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_COPY)) {
1549 /* NE after COPY --> ((cc_dep1 >> ARM64G_CC_SHIFT_Z) ^ 1) & 1 */
1550 return binop(Iop_And64,
1551 binop(Iop_Xor64,
1552 binop(Iop_Shr64, cc_dep1,
1553 mkU8(ARM64G_CC_SHIFT_Z)),
1554 mkU64(1)),
1555 mkU64(1));
1556 }
1557
1558 //ZZ /*----------------- AL -----------------*/
1559 //ZZ
1560 //ZZ /* A critically important case for Thumb code.
1561 //ZZ
1562 //ZZ What we're trying to spot is the case where cond_n_op is an
1563 //ZZ expression of the form Or32(..., 0xE0) since that means the
1564 //ZZ caller is asking for CondAL and we can simply return 1
1565 //ZZ without caring what the ... part is. This is a potentially
1566 //ZZ dodgy kludge in that it assumes that the ... part has zeroes
1567 //ZZ in bits 7:4, so that the result of the Or32 is guaranteed to
1568 //ZZ be 0xE in bits 7:4. Given that the places where this first
1569 //ZZ arg are constructed (in guest_arm_toIR.c) are very
1570 //ZZ constrained, we can get away with this. To make this
1571 //ZZ guaranteed safe would require to have a new primop, Slice44
1572 //ZZ or some such, thusly
1573 //ZZ
1574 //ZZ Slice44(arg1, arg2) = 0--(24)--0 arg1[7:4] arg2[3:0]
1575 //ZZ
1576 //ZZ and we would then look for Slice44(0xE0, ...)
1577 //ZZ which would give the required safety property.
1578 //ZZ
1579 //ZZ It would be infeasibly expensive to scan backwards through
1580 //ZZ the entire block looking for an assignment to the temp, so
1581 //ZZ just look at the previous 16 statements. That should find it
1582 //ZZ if it is an interesting case, as a result of how the
1583 //ZZ boilerplate guff at the start of each Thumb insn translation
1584 //ZZ is made.
1585 //ZZ */
1586 //ZZ if (cond_n_op->tag == Iex_RdTmp) {
1587 //ZZ Int j;
1588 //ZZ IRTemp look_for = cond_n_op->Iex.RdTmp.tmp;
1589 //ZZ Int limit = n_precedingStmts - 16;
1590 //ZZ if (limit < 0) limit = 0;
1591 //ZZ if (0) vex_printf("scanning %d .. %d\n", n_precedingStmts-1, limit);
1592 //ZZ for (j = n_precedingStmts - 1; j >= limit; j--) {
1593 //ZZ IRStmt* st = precedingStmts[j];
1594 //ZZ if (st->tag == Ist_WrTmp
1595 //ZZ && st->Ist.WrTmp.tmp == look_for
1596 //ZZ && st->Ist.WrTmp.data->tag == Iex_Binop
1597 //ZZ && st->Ist.WrTmp.data->Iex.Binop.op == Iop_Or32
1598 //ZZ && isU32(st->Ist.WrTmp.data->Iex.Binop.arg2, (ARMCondAL << 4)))
1599 //ZZ return mkU32(1);
1600 //ZZ }
1601 //ZZ /* Didn't find any useful binding to the first arg
1602 //ZZ in the previous 16 stmts. */
1603 //ZZ }
1604 }
1605
1606 //ZZ /* --------- specialising "armg_calculate_flag_c" --------- */
1607 //ZZ
1608 //ZZ else
1609 //ZZ if (vex_streq(function_name, "armg_calculate_flag_c")) {
1610 //ZZ
1611 //ZZ /* specialise calls to the "armg_calculate_flag_c" function.
1612 //ZZ Note that the returned value must be either 0 or 1; nonzero
1613 //ZZ bits 31:1 are not allowed. In turn, incoming oldV and oldC
1614 //ZZ values (from the thunk) are assumed to have bits 31:1
1615 //ZZ clear. */
1616 //ZZ IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1617 //ZZ vassert(arity == 4);
1618 //ZZ cc_op = args[0]; /* ARMG_CC_OP_* */
1619 //ZZ cc_dep1 = args[1];
1620 //ZZ cc_dep2 = args[2];
1621 //ZZ cc_ndep = args[3];
1622 //ZZ
1623 //ZZ if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
1624 //ZZ /* Thunk args are (result, shco, oldV) */
1625 //ZZ /* C after LOGIC --> shco */
1626 //ZZ return cc_dep2;
1627 //ZZ }
1628 //ZZ
1629 //ZZ if (isU32(cc_op, ARMG_CC_OP_SUB)) {
1630 //ZZ /* Thunk args are (argL, argR, unused) */
1631 //ZZ /* C after SUB --> argL >=u argR
1632 //ZZ --> argR <=u argL */
1633 //ZZ return unop(Iop_1Uto32,
1634 //ZZ binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
1635 //ZZ }
1636 //ZZ
1637 //ZZ if (isU32(cc_op, ARMG_CC_OP_SBB)) {
1638 //ZZ /* This happens occasionally in softfloat code, eg __divdf3+140 */
1639 //ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1640 //ZZ /* C after SBB (same as HS after SBB above)
1641 //ZZ --> oldC ? (argL >=u argR) : (argL >u argR)
1642 //ZZ --> oldC ? (argR <=u argL) : (argR <u argL)
1643 //ZZ */
1644 //ZZ return
1645 //ZZ IRExpr_ITE(
1646 //ZZ binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
1647 //ZZ /* case oldC != 0 */
1648 //ZZ unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
1649 //ZZ /* case oldC == 0 */
1650 //ZZ unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
1651 //ZZ );
1652 //ZZ }
1653 //ZZ
1654 //ZZ }
1655 //ZZ
1656 //ZZ /* --------- specialising "armg_calculate_flag_v" --------- */
1657 //ZZ
1658 //ZZ else
1659 //ZZ if (vex_streq(function_name, "armg_calculate_flag_v")) {
1660 //ZZ
1661 //ZZ /* specialise calls to the "armg_calculate_flag_v" function.
1662 //ZZ Note that the returned value must be either 0 or 1; nonzero
1663 //ZZ bits 31:1 are not allowed. In turn, incoming oldV and oldC
1664 //ZZ values (from the thunk) are assumed to have bits 31:1
1665 //ZZ clear. */
1666 //ZZ IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1667 //ZZ vassert(arity == 4);
1668 //ZZ cc_op = args[0]; /* ARMG_CC_OP_* */
1669 //ZZ cc_dep1 = args[1];
1670 //ZZ cc_dep2 = args[2];
1671 //ZZ cc_ndep = args[3];
1672 //ZZ
1673 //ZZ if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
1674 //ZZ /* Thunk args are (result, shco, oldV) */
1675 //ZZ /* V after LOGIC --> oldV */
1676 //ZZ return cc_ndep;
1677 //ZZ }
1678 //ZZ
1679 //ZZ if (isU32(cc_op, ARMG_CC_OP_SUB)) {
1680 //ZZ /* Thunk args are (argL, argR, unused) */
1681 //ZZ /* V after SUB
1682 //ZZ --> let res = argL - argR
1683 //ZZ in ((argL ^ argR) & (argL ^ res)) >> 31
1684 //ZZ --> ((argL ^ argR) & (argL ^ (argL - argR))) >> 31
1685 //ZZ */
1686 //ZZ IRExpr* argL = cc_dep1;
1687 //ZZ IRExpr* argR = cc_dep2;
1688 //ZZ return
1689 //ZZ binop(Iop_Shr32,
1690 //ZZ binop(Iop_And32,
1691 //ZZ binop(Iop_Xor32, argL, argR),
1692 //ZZ binop(Iop_Xor32, argL, binop(Iop_Sub32, argL, argR))
1693 //ZZ ),
1694 //ZZ mkU8(31)
1695 //ZZ );
1696 //ZZ }
1697 //ZZ
1698 //ZZ if (isU32(cc_op, ARMG_CC_OP_SBB)) {
1699 //ZZ /* This happens occasionally in softfloat code, eg __divdf3+140 */
1700 //ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1701 //ZZ /* V after SBB
1702 //ZZ --> let res = argL - argR - (oldC ^ 1)
1703 //ZZ in (argL ^ argR) & (argL ^ res) & 1
1704 //ZZ */
1705 //ZZ return
1706 //ZZ binop(
1707 //ZZ Iop_And32,
1708 //ZZ binop(
1709 //ZZ Iop_And32,
1710 //ZZ // argL ^ argR
1711 //ZZ binop(Iop_Xor32, cc_dep1, cc_dep2),
1712 //ZZ // argL ^ (argL - argR - (oldC ^ 1))
1713 //ZZ binop(Iop_Xor32,
1714 //ZZ cc_dep1,
1715 //ZZ binop(Iop_Sub32,
1716 //ZZ binop(Iop_Sub32, cc_dep1, cc_dep2),
1717 //ZZ binop(Iop_Xor32, cc_ndep, mkU32(1)))
1718 //ZZ )
1719 //ZZ ),
1720 //ZZ mkU32(1)
1721 //ZZ );
1722 //ZZ }
1723 //ZZ
1724 //ZZ }
1725
1726 # undef unop
1727 # undef binop
1728 # undef mkU64
1729 # undef mkU8
1730
1731 return NULL;
1732 }
1733
1734
1735 /*----------------------------------------------*/
1736 /*--- The exported fns .. ---*/
1737 /*----------------------------------------------*/
1738
1739 //ZZ /* VISIBLE TO LIBVEX CLIENT */
1740 //ZZ #if 0
1741 //ZZ void LibVEX_GuestARM_put_flags ( UInt flags_native,
1742 //ZZ /*OUT*/VexGuestARMState* vex_state )
1743 //ZZ {
1744 //ZZ vassert(0); // FIXME
1745 //ZZ
1746 //ZZ /* Mask out everything except N Z V C. */
1747 //ZZ flags_native
1748 //ZZ &= (ARMG_CC_MASK_N | ARMG_CC_MASK_Z | ARMG_CC_MASK_V | ARMG_CC_MASK_C);
1749 //ZZ
1750 //ZZ vex_state->guest_CC_OP = ARMG_CC_OP_COPY;
1751 //ZZ vex_state->guest_CC_DEP1 = flags_native;
1752 //ZZ vex_state->guest_CC_DEP2 = 0;
1753 //ZZ vex_state->guest_CC_NDEP = 0;
1754 //ZZ }
1755 //ZZ #endif
1756
1757 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestARM64_get_nzcv(const VexGuestARM64State * vex_state)1758 ULong LibVEX_GuestARM64_get_nzcv ( /*IN*/const VexGuestARM64State* vex_state )
1759 {
1760 ULong nzcv = 0;
1761 // NZCV
1762 nzcv |= arm64g_calculate_flags_nzcv(
1763 vex_state->guest_CC_OP,
1764 vex_state->guest_CC_DEP1,
1765 vex_state->guest_CC_DEP2,
1766 vex_state->guest_CC_NDEP
1767 );
1768 vassert(0 == (nzcv & 0xFFFFFFFF0FFFFFFFULL));
1769 //ZZ // Q
1770 //ZZ if (vex_state->guest_QFLAG32 > 0)
1771 //ZZ cpsr |= (1 << 27);
1772 //ZZ // GE
1773 //ZZ if (vex_state->guest_GEFLAG0 > 0)
1774 //ZZ cpsr |= (1 << 16);
1775 //ZZ if (vex_state->guest_GEFLAG1 > 0)
1776 //ZZ cpsr |= (1 << 17);
1777 //ZZ if (vex_state->guest_GEFLAG2 > 0)
1778 //ZZ cpsr |= (1 << 18);
1779 //ZZ if (vex_state->guest_GEFLAG3 > 0)
1780 //ZZ cpsr |= (1 << 19);
1781 //ZZ // M
1782 //ZZ cpsr |= (1 << 4); // 0b10000 means user-mode
1783 //ZZ // J,T J (bit 24) is zero by initialisation above
1784 //ZZ // T we copy from R15T[0]
1785 //ZZ if (vex_state->guest_R15T & 1)
1786 //ZZ cpsr |= (1 << 5);
1787 //ZZ // ITSTATE we punt on for the time being. Could compute it
1788 //ZZ // if needed though.
1789 //ZZ // E, endianness, 0 (littleendian) from initialisation above
1790 //ZZ // A,I,F disable some async exceptions. Not sure about these.
1791 //ZZ // Leave as zero for the time being.
1792 return nzcv;
1793 }
1794
1795 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestARM64_get_fpsr(const VexGuestARM64State * vex_state)1796 ULong LibVEX_GuestARM64_get_fpsr ( const VexGuestARM64State* vex_state )
1797 {
1798 UInt w32 = vex_state->guest_QCFLAG[0] | vex_state->guest_QCFLAG[1]
1799 | vex_state->guest_QCFLAG[2] | vex_state->guest_QCFLAG[3];
1800 ULong fpsr = 0;
1801 // QC
1802 if (w32 != 0)
1803 fpsr |= (1 << 27);
1804 return fpsr;
1805 }
1806
LibVEX_GuestARM64_set_fpsr(VexGuestARM64State * vex_state,ULong fpsr)1807 void LibVEX_GuestARM64_set_fpsr ( /*MOD*/VexGuestARM64State* vex_state,
1808 ULong fpsr )
1809 {
1810 // QC
1811 vex_state->guest_QCFLAG[0] = (UInt)((fpsr >> 27) & 1);
1812 vex_state->guest_QCFLAG[1] = 0;
1813 vex_state->guest_QCFLAG[2] = 0;
1814 vex_state->guest_QCFLAG[3] = 0;
1815 }
1816
1817 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestARM64_initialise(VexGuestARM64State * vex_state)1818 void LibVEX_GuestARM64_initialise ( /*OUT*/VexGuestARM64State* vex_state )
1819 {
1820 vex_bzero(vex_state, sizeof(*vex_state));
1821 //ZZ vex_state->host_EvC_FAILADDR = 0;
1822 //ZZ vex_state->host_EvC_COUNTER = 0;
1823 //ZZ
1824 //ZZ vex_state->guest_R0 = 0;
1825 //ZZ vex_state->guest_R1 = 0;
1826 //ZZ vex_state->guest_R2 = 0;
1827 //ZZ vex_state->guest_R3 = 0;
1828 //ZZ vex_state->guest_R4 = 0;
1829 //ZZ vex_state->guest_R5 = 0;
1830 //ZZ vex_state->guest_R6 = 0;
1831 //ZZ vex_state->guest_R7 = 0;
1832 //ZZ vex_state->guest_R8 = 0;
1833 //ZZ vex_state->guest_R9 = 0;
1834 //ZZ vex_state->guest_R10 = 0;
1835 //ZZ vex_state->guest_R11 = 0;
1836 //ZZ vex_state->guest_R12 = 0;
1837 //ZZ vex_state->guest_R13 = 0;
1838 //ZZ vex_state->guest_R14 = 0;
1839 //ZZ vex_state->guest_R15T = 0; /* NB: implies ARM mode */
1840 //ZZ
1841 vex_state->guest_CC_OP = ARM64G_CC_OP_COPY;
1842 //ZZ vex_state->guest_CC_DEP1 = 0;
1843 //ZZ vex_state->guest_CC_DEP2 = 0;
1844 //ZZ vex_state->guest_CC_NDEP = 0;
1845 //ZZ vex_state->guest_QFLAG32 = 0;
1846 //ZZ vex_state->guest_GEFLAG0 = 0;
1847 //ZZ vex_state->guest_GEFLAG1 = 0;
1848 //ZZ vex_state->guest_GEFLAG2 = 0;
1849 //ZZ vex_state->guest_GEFLAG3 = 0;
1850 //ZZ
1851 //ZZ vex_state->guest_EMNOTE = EmNote_NONE;
1852 //ZZ vex_state->guest_CMSTART = 0;
1853 //ZZ vex_state->guest_CMLEN = 0;
1854 //ZZ vex_state->guest_NRADDR = 0;
1855 //ZZ vex_state->guest_IP_AT_SYSCALL = 0;
1856 //ZZ
1857 //ZZ vex_state->guest_D0 = 0;
1858 //ZZ vex_state->guest_D1 = 0;
1859 //ZZ vex_state->guest_D2 = 0;
1860 //ZZ vex_state->guest_D3 = 0;
1861 //ZZ vex_state->guest_D4 = 0;
1862 //ZZ vex_state->guest_D5 = 0;
1863 //ZZ vex_state->guest_D6 = 0;
1864 //ZZ vex_state->guest_D7 = 0;
1865 //ZZ vex_state->guest_D8 = 0;
1866 //ZZ vex_state->guest_D9 = 0;
1867 //ZZ vex_state->guest_D10 = 0;
1868 //ZZ vex_state->guest_D11 = 0;
1869 //ZZ vex_state->guest_D12 = 0;
1870 //ZZ vex_state->guest_D13 = 0;
1871 //ZZ vex_state->guest_D14 = 0;
1872 //ZZ vex_state->guest_D15 = 0;
1873 //ZZ vex_state->guest_D16 = 0;
1874 //ZZ vex_state->guest_D17 = 0;
1875 //ZZ vex_state->guest_D18 = 0;
1876 //ZZ vex_state->guest_D19 = 0;
1877 //ZZ vex_state->guest_D20 = 0;
1878 //ZZ vex_state->guest_D21 = 0;
1879 //ZZ vex_state->guest_D22 = 0;
1880 //ZZ vex_state->guest_D23 = 0;
1881 //ZZ vex_state->guest_D24 = 0;
1882 //ZZ vex_state->guest_D25 = 0;
1883 //ZZ vex_state->guest_D26 = 0;
1884 //ZZ vex_state->guest_D27 = 0;
1885 //ZZ vex_state->guest_D28 = 0;
1886 //ZZ vex_state->guest_D29 = 0;
1887 //ZZ vex_state->guest_D30 = 0;
1888 //ZZ vex_state->guest_D31 = 0;
1889 //ZZ
1890 //ZZ /* ARM encoded; zero is the default as it happens (result flags
1891 //ZZ (NZCV) cleared, FZ disabled, round to nearest, non-vector mode,
1892 //ZZ all exns masked, all exn sticky bits cleared). */
1893 //ZZ vex_state->guest_FPSCR = 0;
1894 //ZZ
1895 //ZZ vex_state->guest_TPIDRURO = 0;
1896 //ZZ
1897 //ZZ /* Not in a Thumb IT block. */
1898 //ZZ vex_state->guest_ITSTATE = 0;
1899 //ZZ
1900 //ZZ vex_state->padding1 = 0;
1901 //ZZ vex_state->padding2 = 0;
1902 //ZZ vex_state->padding3 = 0;
1903 //ZZ vex_state->padding4 = 0;
1904 //ZZ vex_state->padding5 = 0;
1905 }
1906
1907
1908 /*-----------------------------------------------------------*/
1909 /*--- Describing the arm guest state, for the benefit ---*/
1910 /*--- of iropt and instrumenters. ---*/
1911 /*-----------------------------------------------------------*/
1912
1913 /* Figure out if any part of the guest state contained in minoff
1914 .. maxoff requires precise memory exceptions. If in doubt return
1915 True (but this generates significantly slower code).
1916
1917 We enforce precise exns for guest SP, PC, 29(FP), 30(LR).
1918 That might be overkill (for 29 and 30); I don't know.
1919 */
guest_arm64_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)1920 Bool guest_arm64_state_requires_precise_mem_exns (
1921 Int minoff, Int maxoff, VexRegisterUpdates pxControl
1922 )
1923 {
1924 Int xsp_min = offsetof(VexGuestARM64State, guest_XSP);
1925 Int xsp_max = xsp_min + 8 - 1;
1926 Int pc_min = offsetof(VexGuestARM64State, guest_PC);
1927 Int pc_max = pc_min + 8 - 1;
1928
1929 if (maxoff < xsp_min || minoff > xsp_max) {
1930 /* no overlap with xsp */
1931 if (pxControl == VexRegUpdSpAtMemAccess)
1932 return False; // We only need to check stack pointer.
1933 } else {
1934 return True;
1935 }
1936
1937 if (maxoff < pc_min || minoff > pc_max) {
1938 /* no overlap with pc */
1939 } else {
1940 return True;
1941 }
1942
1943 /* Guessing that we need PX for FP, but I don't really know. */
1944 Int x29_min = offsetof(VexGuestARM64State, guest_X29);
1945 Int x29_max = x29_min + 8 - 1;
1946
1947 if (maxoff < x29_min || minoff > x29_max) {
1948 /* no overlap with x29 */
1949 } else {
1950 return True;
1951 }
1952
1953 /* Guessing that we need PX for LR, but I don't really know. */
1954 Int x30_min = offsetof(VexGuestARM64State, guest_X30);
1955 Int x30_max = x30_min + 8 - 1;
1956
1957 if (maxoff < x30_min || minoff > x30_max) {
1958 /* no overlap with r30 */
1959 } else {
1960 return True;
1961 }
1962
1963 return False;
1964 }
1965
1966
1967 #define ALWAYSDEFD(field) \
1968 { offsetof(VexGuestARM64State, field), \
1969 (sizeof ((VexGuestARM64State*)0)->field) }
1970 VexGuestLayout
1971 arm64Guest_layout
1972 = {
1973 /* Total size of the guest state, in bytes. */
1974 .total_sizeB = sizeof(VexGuestARM64State),
1975
1976 /* Describe the stack pointer. */
1977 .offset_SP = offsetof(VexGuestARM64State,guest_XSP),
1978 .sizeof_SP = 8,
1979
1980 /* Describe the instruction pointer. */
1981 .offset_IP = offsetof(VexGuestARM64State,guest_PC),
1982 .sizeof_IP = 8,
1983
1984 /* Describe any sections to be regarded by Memcheck as
1985 'always-defined'. */
1986 .n_alwaysDefd = 9,
1987
1988 /* flags thunk: OP is always defd, whereas DEP1 and DEP2
1989 have to be tracked. See detailed comment in gdefs.h on
1990 meaning of thunk fields. */
1991 .alwaysDefd
1992 = { /* 0 */ ALWAYSDEFD(guest_PC),
1993 /* 1 */ ALWAYSDEFD(guest_CC_OP),
1994 /* 2 */ ALWAYSDEFD(guest_CC_NDEP),
1995 /* 3 */ ALWAYSDEFD(guest_EMNOTE),
1996 /* 4 */ ALWAYSDEFD(guest_CMSTART),
1997 /* 5 */ ALWAYSDEFD(guest_CMLEN),
1998 /* 6 */ ALWAYSDEFD(guest_NRADDR),
1999 /* 7 */ ALWAYSDEFD(guest_IP_AT_SYSCALL),
2000 /* 8 */ ALWAYSDEFD(guest_TPIDR_EL0)
2001 }
2002 };
2003
2004
2005 /*---------------------------------------------------------------*/
2006 /*--- end guest_arm64_helpers.c ---*/
2007 /*---------------------------------------------------------------*/
2008