1
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_amd64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_amd64.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_amd64_defs.h"
46 #include "guest_generic_x87.h"
47
48
49 /* This file contains helper functions for amd64 guest code.
50 Calls to these functions are generated by the back end.
51 These calls are of course in the host machine code and
52 this file will be compiled to host machine code, so that
53 all makes sense.
54
55 Only change the signatures of these helper functions very
56 carefully. If you change the signature here, you'll have to change
57 the parameters passed to it in the IR calls constructed by
58 guest-amd64/toIR.c.
59
60 The convention used is that all functions called from generated
61 code are named amd64g_<something>, and any function whose name lacks
62 that prefix is not called from generated code. Note that some
63 LibVEX_* functions can however be called by VEX's client, but that
64 is not the same as calling them from VEX-generated code.
65 */
66
67
68 /* Set to 1 to get detailed profiling info about use of the flag
69 machinery. */
70 #define PROFILE_RFLAGS 0
71
72
73 /*---------------------------------------------------------------*/
74 /*--- %rflags run-time helpers. ---*/
75 /*---------------------------------------------------------------*/
76
77 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
78 after imulq/mulq. */
79
mullS64(Long u,Long v,Long * rHi,Long * rLo)80 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
81 {
82 const Long halfMask = 0xFFFFFFFFLL;
83 ULong u0, v0, w0;
84 Long u1, v1, w1, w2, t;
85 u0 = u & halfMask;
86 u1 = u >> 32;
87 v0 = v & halfMask;
88 v1 = v >> 32;
89 w0 = u0 * v0;
90 t = u1 * v0 + (w0 >> 32);
91 w1 = t & halfMask;
92 w2 = t >> 32;
93 w1 = u0 * v1 + w1;
94 *rHi = u1 * v1 + w2 + (w1 >> 32);
95 *rLo = (Long)((ULong)u * (ULong)v);
96 }
97
mullU64(ULong u,ULong v,ULong * rHi,ULong * rLo)98 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
99 {
100 const ULong halfMask = 0xFFFFFFFFULL;
101 ULong u0, v0, w0;
102 ULong u1, v1, w1,w2,t;
103 u0 = u & halfMask;
104 u1 = u >> 32;
105 v0 = v & halfMask;
106 v1 = v >> 32;
107 w0 = u0 * v0;
108 t = u1 * v0 + (w0 >> 32);
109 w1 = t & halfMask;
110 w2 = t >> 32;
111 w1 = u0 * v1 + w1;
112 *rHi = u1 * v1 + w2 + (w1 >> 32);
113 *rLo = u * v;
114 }
115
116
117 static const UChar parity_table[256] = {
118 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
119 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
120 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
121 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
122 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
123 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
124 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
125 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
126 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
127 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
128 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
129 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
130 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
131 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
132 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
133 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
134 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
135 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
136 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
137 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
138 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
139 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
140 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
141 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
142 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
143 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
144 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
145 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
146 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
147 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
148 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
149 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
150 };
151
152 /* generalised left-shifter */
lshift(Long x,Int n)153 static inline Long lshift ( Long x, Int n )
154 {
155 if (n >= 0)
156 return (ULong)x << n;
157 else
158 return x >> (-n);
159 }
160
161 /* identity on ULong */
idULong(ULong x)162 static inline ULong idULong ( ULong x )
163 {
164 return x;
165 }
166
167
168 #define PREAMBLE(__data_bits) \
169 /* const */ ULong DATA_MASK \
170 = __data_bits==8 \
171 ? 0xFFULL \
172 : (__data_bits==16 \
173 ? 0xFFFFULL \
174 : (__data_bits==32 \
175 ? 0xFFFFFFFFULL \
176 : 0xFFFFFFFFFFFFFFFFULL)); \
177 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \
178 /* const */ ULong CC_DEP1 = cc_dep1_formal; \
179 /* const */ ULong CC_DEP2 = cc_dep2_formal; \
180 /* const */ ULong CC_NDEP = cc_ndep_formal; \
181 /* Four bogus assignments, which hopefully gcc can */ \
182 /* optimise away, and which stop it complaining about */ \
183 /* unused variables. */ \
184 SIGN_MASK = SIGN_MASK; \
185 DATA_MASK = DATA_MASK; \
186 CC_DEP2 = CC_DEP2; \
187 CC_NDEP = CC_NDEP;
188
189
190 /*-------------------------------------------------------------*/
191
192 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
193 { \
194 PREAMBLE(DATA_BITS); \
195 { ULong cf, pf, af, zf, sf, of; \
196 ULong argL, argR, res; \
197 argL = CC_DEP1; \
198 argR = CC_DEP2; \
199 res = argL + argR; \
200 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
201 pf = parity_table[(UChar)res]; \
202 af = (res ^ argL ^ argR) & 0x10; \
203 zf = ((DATA_UTYPE)res == 0) << 6; \
204 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
205 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
206 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
207 return cf | pf | af | zf | sf | of; \
208 } \
209 }
210
211 /*-------------------------------------------------------------*/
212
213 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
214 { \
215 PREAMBLE(DATA_BITS); \
216 { ULong cf, pf, af, zf, sf, of; \
217 ULong argL, argR, res; \
218 argL = CC_DEP1; \
219 argR = CC_DEP2; \
220 res = argL - argR; \
221 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
222 pf = parity_table[(UChar)res]; \
223 af = (res ^ argL ^ argR) & 0x10; \
224 zf = ((DATA_UTYPE)res == 0) << 6; \
225 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
226 of = lshift((argL ^ argR) & (argL ^ res), \
227 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
228 return cf | pf | af | zf | sf | of; \
229 } \
230 }
231
232 /*-------------------------------------------------------------*/
233
234 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
235 { \
236 PREAMBLE(DATA_BITS); \
237 { ULong cf, pf, af, zf, sf, of; \
238 ULong argL, argR, oldC, res; \
239 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
240 argL = CC_DEP1; \
241 argR = CC_DEP2 ^ oldC; \
242 res = (argL + argR) + oldC; \
243 if (oldC) \
244 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
245 else \
246 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
247 pf = parity_table[(UChar)res]; \
248 af = (res ^ argL ^ argR) & 0x10; \
249 zf = ((DATA_UTYPE)res == 0) << 6; \
250 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
251 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
252 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
253 return cf | pf | af | zf | sf | of; \
254 } \
255 }
256
257 /*-------------------------------------------------------------*/
258
259 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
260 { \
261 PREAMBLE(DATA_BITS); \
262 { ULong cf, pf, af, zf, sf, of; \
263 ULong argL, argR, oldC, res; \
264 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
265 argL = CC_DEP1; \
266 argR = CC_DEP2 ^ oldC; \
267 res = (argL - argR) - oldC; \
268 if (oldC) \
269 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
270 else \
271 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
272 pf = parity_table[(UChar)res]; \
273 af = (res ^ argL ^ argR) & 0x10; \
274 zf = ((DATA_UTYPE)res == 0) << 6; \
275 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
276 of = lshift((argL ^ argR) & (argL ^ res), \
277 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
278 return cf | pf | af | zf | sf | of; \
279 } \
280 }
281
282 /*-------------------------------------------------------------*/
283
284 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
285 { \
286 PREAMBLE(DATA_BITS); \
287 { ULong cf, pf, af, zf, sf, of; \
288 cf = 0; \
289 pf = parity_table[(UChar)CC_DEP1]; \
290 af = 0; \
291 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
292 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
293 of = 0; \
294 return cf | pf | af | zf | sf | of; \
295 } \
296 }
297
298 /*-------------------------------------------------------------*/
299
300 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
301 { \
302 PREAMBLE(DATA_BITS); \
303 { ULong cf, pf, af, zf, sf, of; \
304 ULong argL, argR, res; \
305 res = CC_DEP1; \
306 argL = res - 1; \
307 argR = 1; \
308 cf = CC_NDEP & AMD64G_CC_MASK_C; \
309 pf = parity_table[(UChar)res]; \
310 af = (res ^ argL ^ argR) & 0x10; \
311 zf = ((DATA_UTYPE)res == 0) << 6; \
312 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
313 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
314 return cf | pf | af | zf | sf | of; \
315 } \
316 }
317
318 /*-------------------------------------------------------------*/
319
320 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
321 { \
322 PREAMBLE(DATA_BITS); \
323 { ULong cf, pf, af, zf, sf, of; \
324 ULong argL, argR, res; \
325 res = CC_DEP1; \
326 argL = res + 1; \
327 argR = 1; \
328 cf = CC_NDEP & AMD64G_CC_MASK_C; \
329 pf = parity_table[(UChar)res]; \
330 af = (res ^ argL ^ argR) & 0x10; \
331 zf = ((DATA_UTYPE)res == 0) << 6; \
332 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
333 of = ((res & DATA_MASK) \
334 == ((ULong)SIGN_MASK - 1)) << 11; \
335 return cf | pf | af | zf | sf | of; \
336 } \
337 }
338
339 /*-------------------------------------------------------------*/
340
341 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
342 { \
343 PREAMBLE(DATA_BITS); \
344 { ULong cf, pf, af, zf, sf, of; \
345 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
346 pf = parity_table[(UChar)CC_DEP1]; \
347 af = 0; /* undefined */ \
348 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
349 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
350 /* of is defined if shift count == 1 */ \
351 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
352 & AMD64G_CC_MASK_O; \
353 return cf | pf | af | zf | sf | of; \
354 } \
355 }
356
357 /*-------------------------------------------------------------*/
358
359 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
360 { \
361 PREAMBLE(DATA_BITS); \
362 { ULong cf, pf, af, zf, sf, of; \
363 cf = CC_DEP2 & 1; \
364 pf = parity_table[(UChar)CC_DEP1]; \
365 af = 0; /* undefined */ \
366 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
367 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
368 /* of is defined if shift count == 1 */ \
369 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
370 & AMD64G_CC_MASK_O; \
371 return cf | pf | af | zf | sf | of; \
372 } \
373 }
374
375 /*-------------------------------------------------------------*/
376
377 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
378 /* DEP1 = result, NDEP = old flags */
379 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
380 { \
381 PREAMBLE(DATA_BITS); \
382 { ULong fl \
383 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
384 | (AMD64G_CC_MASK_C & CC_DEP1) \
385 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
386 11-(DATA_BITS-1)) \
387 ^ lshift(CC_DEP1, 11))); \
388 return fl; \
389 } \
390 }
391
392 /*-------------------------------------------------------------*/
393
394 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
395 /* DEP1 = result, NDEP = old flags */
396 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
397 { \
398 PREAMBLE(DATA_BITS); \
399 { ULong fl \
400 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
401 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
402 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
403 11-(DATA_BITS-1)) \
404 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
405 return fl; \
406 } \
407 }
408
409 /*-------------------------------------------------------------*/
410
411 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
412 DATA_U2TYPE, NARROWto2U) \
413 { \
414 PREAMBLE(DATA_BITS); \
415 { ULong cf, pf, af, zf, sf, of; \
416 DATA_UTYPE hi; \
417 DATA_UTYPE lo \
418 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
419 * ((DATA_UTYPE)CC_DEP2) ); \
420 DATA_U2TYPE rr \
421 = NARROWto2U( \
422 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
423 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
424 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
425 cf = (hi != 0); \
426 pf = parity_table[(UChar)lo]; \
427 af = 0; /* undefined */ \
428 zf = (lo == 0) << 6; \
429 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
430 of = cf << 11; \
431 return cf | pf | af | zf | sf | of; \
432 } \
433 }
434
435 /*-------------------------------------------------------------*/
436
437 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
438 DATA_S2TYPE, NARROWto2S) \
439 { \
440 PREAMBLE(DATA_BITS); \
441 { ULong cf, pf, af, zf, sf, of; \
442 DATA_STYPE hi; \
443 DATA_STYPE lo \
444 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
445 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
446 DATA_S2TYPE rr \
447 = NARROWto2S( \
448 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
449 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
450 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
451 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
452 pf = parity_table[(UChar)lo]; \
453 af = 0; /* undefined */ \
454 zf = (lo == 0) << 6; \
455 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
456 of = cf << 11; \
457 return cf | pf | af | zf | sf | of; \
458 } \
459 }
460
461 /*-------------------------------------------------------------*/
462
463 #define ACTIONS_UMULQ \
464 { \
465 PREAMBLE(64); \
466 { ULong cf, pf, af, zf, sf, of; \
467 ULong lo, hi; \
468 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
469 cf = (hi != 0); \
470 pf = parity_table[(UChar)lo]; \
471 af = 0; /* undefined */ \
472 zf = (lo == 0) << 6; \
473 sf = lshift(lo, 8 - 64) & 0x80; \
474 of = cf << 11; \
475 return cf | pf | af | zf | sf | of; \
476 } \
477 }
478
479 /*-------------------------------------------------------------*/
480
481 #define ACTIONS_SMULQ \
482 { \
483 PREAMBLE(64); \
484 { ULong cf, pf, af, zf, sf, of; \
485 Long lo, hi; \
486 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
487 cf = (hi != (lo >>/*s*/ (64-1))); \
488 pf = parity_table[(UChar)lo]; \
489 af = 0; /* undefined */ \
490 zf = (lo == 0) << 6; \
491 sf = lshift(lo, 8 - 64) & 0x80; \
492 of = cf << 11; \
493 return cf | pf | af | zf | sf | of; \
494 } \
495 }
496
497 /*-------------------------------------------------------------*/
498
499 #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \
500 { \
501 PREAMBLE(DATA_BITS); \
502 { ULong cf, pf, af, zf, sf, of; \
503 cf = 0; \
504 pf = 0; \
505 af = 0; \
506 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
507 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
508 of = 0; \
509 return cf | pf | af | zf | sf | of; \
510 } \
511 }
512
513 /*-------------------------------------------------------------*/
514
515 #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \
516 { \
517 PREAMBLE(DATA_BITS); \
518 { ULong cf, pf, af, zf, sf, of; \
519 cf = ((DATA_UTYPE)CC_DEP2 != 0); \
520 pf = 0; \
521 af = 0; \
522 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
523 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
524 of = 0; \
525 return cf | pf | af | zf | sf | of; \
526 } \
527 }
528
529 /*-------------------------------------------------------------*/
530
531 #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \
532 { \
533 PREAMBLE(DATA_BITS); \
534 { Long cf, pf, af, zf, sf, of; \
535 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
536 pf = 0; \
537 af = 0; \
538 zf = 0; \
539 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
540 of = 0; \
541 return cf | pf | af | zf | sf | of; \
542 } \
543 }
544
545 /*-------------------------------------------------------------*/
546
547 #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \
548 { \
549 PREAMBLE(DATA_BITS); \
550 { ULong cf, pf, af, zf, sf, of; \
551 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
552 pf = 0; \
553 af = 0; \
554 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
555 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
556 of = 0; \
557 return cf | pf | af | zf | sf | of; \
558 } \
559 }
560
561 /*-------------------------------------------------------------*/
562
563 #define ACTIONS_ADX(DATA_BITS,DATA_UTYPE,FLAGNAME) \
564 { \
565 PREAMBLE(DATA_BITS); \
566 { ULong ocf; /* o or c */ \
567 ULong argL, argR, oldOC, res; \
568 oldOC = (CC_NDEP >> AMD64G_CC_SHIFT_##FLAGNAME) & 1; \
569 argL = CC_DEP1; \
570 argR = CC_DEP2 ^ oldOC; \
571 res = (argL + argR) + oldOC; \
572 if (oldOC) \
573 ocf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
574 else \
575 ocf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
576 return (CC_NDEP & ~AMD64G_CC_MASK_##FLAGNAME) \
577 | (ocf << AMD64G_CC_SHIFT_##FLAGNAME); \
578 } \
579 }
580
581 /*-------------------------------------------------------------*/
582
583
584 #if PROFILE_RFLAGS
585
586 static Bool initted = False;
587
588 /* C flag, fast route */
589 static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
590 /* C flag, slow route */
591 static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
592 /* table for calculate_cond */
593 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
594 /* total entry counts for calc_all, calc_c, calc_cond. */
595 static UInt n_calc_all = 0;
596 static UInt n_calc_c = 0;
597 static UInt n_calc_cond = 0;
598
599 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
600
601
showCounts(void)602 static void showCounts ( void )
603 {
604 Int op, co;
605 HChar ch;
606 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
607 n_calc_all, n_calc_cond, n_calc_c);
608
609 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
610 " S NS P NP L NL LE NLE\n");
611 vex_printf(" -----------------------------------------------------"
612 "----------------------------------------\n");
613 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
614
615 ch = ' ';
616 if (op > 0 && (op-1) % 4 == 0)
617 ch = 'B';
618 if (op > 0 && (op-1) % 4 == 1)
619 ch = 'W';
620 if (op > 0 && (op-1) % 4 == 2)
621 ch = 'L';
622 if (op > 0 && (op-1) % 4 == 3)
623 ch = 'Q';
624
625 vex_printf("%2d%c: ", op, ch);
626 vex_printf("%6u ", tabc_slow[op]);
627 vex_printf("%6u ", tabc_fast[op]);
628 for (co = 0; co < 16; co++) {
629 Int n = tab_cond[op][co];
630 if (n >= 1000) {
631 vex_printf(" %3dK", n / 1000);
632 } else
633 if (n >= 0) {
634 vex_printf(" %3d ", n );
635 } else {
636 vex_printf(" ");
637 }
638 }
639 vex_printf("\n");
640 }
641 vex_printf("\n");
642 }
643
initCounts(void)644 static void initCounts ( void )
645 {
646 Int op, co;
647 initted = True;
648 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
649 tabc_fast[op] = tabc_slow[op] = 0;
650 for (co = 0; co < 16; co++)
651 tab_cond[op][co] = 0;
652 }
653 }
654
655 #endif /* PROFILE_RFLAGS */
656
657
658 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
659 /* Calculate all the 6 flags from the supplied thunk parameters.
660 Worker function, not directly called from generated code. */
661 static
amd64g_calculate_rflags_all_WRK(ULong cc_op,ULong cc_dep1_formal,ULong cc_dep2_formal,ULong cc_ndep_formal)662 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
663 ULong cc_dep1_formal,
664 ULong cc_dep2_formal,
665 ULong cc_ndep_formal )
666 {
667 switch (cc_op) {
668 case AMD64G_CC_OP_COPY:
669 return cc_dep1_formal
670 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
671 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
672
673 case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
674 case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
675 case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
676 case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong );
677
678 case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
679 case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
680 case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
681 case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong );
682
683 case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
684 case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
685 case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
686 case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong );
687
688 case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
689 case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
690 case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
691 case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong );
692
693 case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
694 case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
695 case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
696 case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong );
697
698 case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
699 case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
700 case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
701 case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong );
702
703 case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
704 case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
705 case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
706 case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong );
707
708 case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
709 case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
710 case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
711 case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong );
712
713 case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
714 case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
715 case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
716 case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong );
717
718 case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
719 case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
720 case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
721 case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong );
722
723 case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
724 case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
725 case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
726 case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong );
727
728 case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
729 UShort, toUShort );
730 case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
731 UInt, toUInt );
732 case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
733 ULong, idULong );
734
735 case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ;
736
737 case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
738 Short, toUShort );
739 case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
740 Int, toUInt );
741 case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
742 Long, idULong );
743
744 case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ;
745
746 case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt );
747 case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong );
748
749 case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt );
750 case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong );
751
752 case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt );
753 case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong );
754
755 case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt );
756 case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong );
757
758 case AMD64G_CC_OP_ADCX32: ACTIONS_ADX( 32, UInt, C );
759 case AMD64G_CC_OP_ADCX64: ACTIONS_ADX( 64, ULong, C );
760
761 case AMD64G_CC_OP_ADOX32: ACTIONS_ADX( 32, UInt, O );
762 case AMD64G_CC_OP_ADOX64: ACTIONS_ADX( 64, ULong, O );
763
764 default:
765 /* shouldn't really make these calls from generated code */
766 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
767 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
768 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
769 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
770 }
771 }
772
773
774 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
775 /* Calculate all the 6 flags from the supplied thunk parameters. */
amd64g_calculate_rflags_all(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)776 ULong amd64g_calculate_rflags_all ( ULong cc_op,
777 ULong cc_dep1,
778 ULong cc_dep2,
779 ULong cc_ndep )
780 {
781 # if PROFILE_RFLAGS
782 if (!initted) initCounts();
783 n_calc_all++;
784 if (SHOW_COUNTS_NOW) showCounts();
785 # endif
786 return
787 amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
788 }
789
790
791 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
792 /* Calculate just the carry flag from the supplied thunk parameters. */
amd64g_calculate_rflags_c(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)793 ULong amd64g_calculate_rflags_c ( ULong cc_op,
794 ULong cc_dep1,
795 ULong cc_dep2,
796 ULong cc_ndep )
797 {
798 # if PROFILE_RFLAGS
799 if (!initted) initCounts();
800 n_calc_c++;
801 tabc_fast[cc_op]++;
802 if (SHOW_COUNTS_NOW) showCounts();
803 # endif
804
805 /* Fast-case some common ones. */
806 switch (cc_op) {
807 case AMD64G_CC_OP_COPY:
808 return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
809 case AMD64G_CC_OP_LOGICQ:
810 case AMD64G_CC_OP_LOGICL:
811 case AMD64G_CC_OP_LOGICW:
812 case AMD64G_CC_OP_LOGICB:
813 return 0;
814 // case AMD64G_CC_OP_SUBL:
815 // return ((UInt)cc_dep1) < ((UInt)cc_dep2)
816 // ? AMD64G_CC_MASK_C : 0;
817 // case AMD64G_CC_OP_SUBW:
818 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
819 // ? AMD64G_CC_MASK_C : 0;
820 // case AMD64G_CC_OP_SUBB:
821 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
822 // ? AMD64G_CC_MASK_C : 0;
823 // case AMD64G_CC_OP_INCL:
824 // case AMD64G_CC_OP_DECL:
825 // return cc_ndep & AMD64G_CC_MASK_C;
826 default:
827 break;
828 }
829
830 # if PROFILE_RFLAGS
831 tabc_fast[cc_op]--;
832 tabc_slow[cc_op]++;
833 # endif
834
835 return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
836 & AMD64G_CC_MASK_C;
837 }
838
839
840 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
841 /* returns 1 or 0 */
amd64g_calculate_condition(ULong cond,ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)842 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
843 ULong cc_op,
844 ULong cc_dep1,
845 ULong cc_dep2,
846 ULong cc_ndep )
847 {
848 ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
849 cc_dep2, cc_ndep);
850 ULong of,sf,zf,cf,pf;
851 ULong inv = cond & 1;
852
853 # if PROFILE_RFLAGS
854 if (!initted) initCounts();
855 tab_cond[cc_op][cond]++;
856 n_calc_cond++;
857 if (SHOW_COUNTS_NOW) showCounts();
858 # endif
859
860 switch (cond) {
861 case AMD64CondNO:
862 case AMD64CondO: /* OF == 1 */
863 of = rflags >> AMD64G_CC_SHIFT_O;
864 return 1 & (inv ^ of);
865
866 case AMD64CondNZ:
867 case AMD64CondZ: /* ZF == 1 */
868 zf = rflags >> AMD64G_CC_SHIFT_Z;
869 return 1 & (inv ^ zf);
870
871 case AMD64CondNB:
872 case AMD64CondB: /* CF == 1 */
873 cf = rflags >> AMD64G_CC_SHIFT_C;
874 return 1 & (inv ^ cf);
875 break;
876
877 case AMD64CondNBE:
878 case AMD64CondBE: /* (CF or ZF) == 1 */
879 cf = rflags >> AMD64G_CC_SHIFT_C;
880 zf = rflags >> AMD64G_CC_SHIFT_Z;
881 return 1 & (inv ^ (cf | zf));
882 break;
883
884 case AMD64CondNS:
885 case AMD64CondS: /* SF == 1 */
886 sf = rflags >> AMD64G_CC_SHIFT_S;
887 return 1 & (inv ^ sf);
888
889 case AMD64CondNP:
890 case AMD64CondP: /* PF == 1 */
891 pf = rflags >> AMD64G_CC_SHIFT_P;
892 return 1 & (inv ^ pf);
893
894 case AMD64CondNL:
895 case AMD64CondL: /* (SF xor OF) == 1 */
896 sf = rflags >> AMD64G_CC_SHIFT_S;
897 of = rflags >> AMD64G_CC_SHIFT_O;
898 return 1 & (inv ^ (sf ^ of));
899 break;
900
901 case AMD64CondNLE:
902 case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */
903 sf = rflags >> AMD64G_CC_SHIFT_S;
904 of = rflags >> AMD64G_CC_SHIFT_O;
905 zf = rflags >> AMD64G_CC_SHIFT_Z;
906 return 1 & (inv ^ ((sf ^ of) | zf));
907 break;
908
909 default:
910 /* shouldn't really make these calls from generated code */
911 vex_printf("amd64g_calculate_condition"
912 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
913 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
914 vpanic("amd64g_calculate_condition");
915 }
916 }
917
918
919 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_get_rflags(const VexGuestAMD64State * vex_state)920 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state )
921 {
922 ULong rflags = amd64g_calculate_rflags_all_WRK(
923 vex_state->guest_CC_OP,
924 vex_state->guest_CC_DEP1,
925 vex_state->guest_CC_DEP2,
926 vex_state->guest_CC_NDEP
927 );
928 Long dflag = vex_state->guest_DFLAG;
929 vassert(dflag == 1 || dflag == -1);
930 if (dflag == -1)
931 rflags |= (1<<10);
932 if (vex_state->guest_IDFLAG == 1)
933 rflags |= (1<<21);
934 if (vex_state->guest_ACFLAG == 1)
935 rflags |= (1<<18);
936
937 return rflags;
938 }
939
940 /* VISIBLE TO LIBVEX CLIENT */
941 void
LibVEX_GuestAMD64_put_rflags(ULong rflags,VexGuestAMD64State * vex_state)942 LibVEX_GuestAMD64_put_rflags ( ULong rflags,
943 /*MOD*/VexGuestAMD64State* vex_state )
944 {
945 /* D flag */
946 if (rflags & AMD64G_CC_MASK_D) {
947 vex_state->guest_DFLAG = -1;
948 rflags &= ~AMD64G_CC_MASK_D;
949 }
950 else
951 vex_state->guest_DFLAG = 1;
952
953 /* ID flag */
954 if (rflags & AMD64G_CC_MASK_ID) {
955 vex_state->guest_IDFLAG = 1;
956 rflags &= ~AMD64G_CC_MASK_ID;
957 }
958 else
959 vex_state->guest_IDFLAG = 0;
960
961 /* AC flag */
962 if (rflags & AMD64G_CC_MASK_AC) {
963 vex_state->guest_ACFLAG = 1;
964 rflags &= ~AMD64G_CC_MASK_AC;
965 }
966 else
967 vex_state->guest_ACFLAG = 0;
968
969 UInt cc_mask = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z |
970 AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P;
971 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
972 vex_state->guest_CC_DEP1 = rflags & cc_mask;
973 vex_state->guest_CC_DEP2 = 0;
974 vex_state->guest_CC_NDEP = 0;
975 }
976
977 /* VISIBLE TO LIBVEX CLIENT */
978 void
LibVEX_GuestAMD64_put_rflag_c(ULong new_carry_flag,VexGuestAMD64State * vex_state)979 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
980 /*MOD*/VexGuestAMD64State* vex_state )
981 {
982 ULong oszacp = amd64g_calculate_rflags_all_WRK(
983 vex_state->guest_CC_OP,
984 vex_state->guest_CC_DEP1,
985 vex_state->guest_CC_DEP2,
986 vex_state->guest_CC_NDEP
987 );
988 if (new_carry_flag & 1) {
989 oszacp |= AMD64G_CC_MASK_C;
990 } else {
991 oszacp &= ~AMD64G_CC_MASK_C;
992 }
993 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
994 vex_state->guest_CC_DEP1 = oszacp;
995 vex_state->guest_CC_DEP2 = 0;
996 vex_state->guest_CC_NDEP = 0;
997 }
998
999
1000 /*---------------------------------------------------------------*/
1001 /*--- %rflags translation-time function specialisers. ---*/
1002 /*--- These help iropt specialise calls the above run-time ---*/
1003 /*--- %rflags functions. ---*/
1004 /*---------------------------------------------------------------*/
1005
1006 /* Used by the optimiser to try specialisations. Returns an
1007 equivalent expression, or NULL if none. */
1008
isU64(IRExpr * e,ULong n)1009 static Bool isU64 ( IRExpr* e, ULong n )
1010 {
1011 return toBool( e->tag == Iex_Const
1012 && e->Iex.Const.con->tag == Ico_U64
1013 && e->Iex.Const.con->Ico.U64 == n );
1014 }
1015
guest_amd64_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)1016 IRExpr* guest_amd64_spechelper ( const HChar* function_name,
1017 IRExpr** args,
1018 IRStmt** precedingStmts,
1019 Int n_precedingStmts )
1020 {
1021 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
1022 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
1023 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
1024 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
1025 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
1026
1027 Int i, arity = 0;
1028 for (i = 0; args[i]; i++)
1029 arity++;
1030 # if 0
1031 vex_printf("spec request:\n");
1032 vex_printf(" %s ", function_name);
1033 for (i = 0; i < arity; i++) {
1034 vex_printf(" ");
1035 ppIRExpr(args[i]);
1036 }
1037 vex_printf("\n");
1038 # endif
1039
1040 /* --------- specialising "amd64g_calculate_condition" --------- */
1041
1042 if (vex_streq(function_name, "amd64g_calculate_condition")) {
1043 /* specialise calls to above "calculate condition" function */
1044 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
1045 vassert(arity == 5);
1046 cond = args[0];
1047 cc_op = args[1];
1048 cc_dep1 = args[2];
1049 cc_dep2 = args[3];
1050
1051 /*---------------- ADDQ ----------------*/
1052
1053 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
1054 /* long long add, then Z --> test (dst+src == 0) */
1055 return unop(Iop_1Uto64,
1056 binop(Iop_CmpEQ64,
1057 binop(Iop_Add64, cc_dep1, cc_dep2),
1058 mkU64(0)));
1059 }
1060
1061 /*---------------- ADDL ----------------*/
1062
1063 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondO)) {
1064 /* This is very commonly generated by Javascript JITs, for
1065 the idiom "do a 32-bit add and jump to out-of-line code if
1066 an overflow occurs". */
1067 /* long add, then O (overflow)
1068 --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31]
1069 --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1070 --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1071 */
1072 vassert(isIRAtom(cc_dep1));
1073 vassert(isIRAtom(cc_dep2));
1074 return
1075 binop(Iop_And64,
1076 binop(Iop_Shr64,
1077 binop(Iop_And64,
1078 unop(Iop_Not64,
1079 binop(Iop_Xor64, cc_dep1, cc_dep2)),
1080 binop(Iop_Xor64,
1081 cc_dep1,
1082 binop(Iop_Add64, cc_dep1, cc_dep2))),
1083 mkU8(31)),
1084 mkU64(1));
1085
1086 }
1087
1088 /*---------------- SUBQ ----------------*/
1089
1090 /* 0, */
1091 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondO)) {
1092 /* long long sub/cmp, then O (overflow)
1093 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63]
1094 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63
1095 */
1096 vassert(isIRAtom(cc_dep1));
1097 vassert(isIRAtom(cc_dep2));
1098 return binop(Iop_Shr64,
1099 binop(Iop_And64,
1100 binop(Iop_Xor64, cc_dep1, cc_dep2),
1101 binop(Iop_Xor64,
1102 cc_dep1,
1103 binop(Iop_Sub64, cc_dep1, cc_dep2))),
1104 mkU8(63));
1105 }
1106 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNO)) {
1107 /* No action. Never yet found a test case. */
1108 }
1109
1110 /* 2, 3 */
1111 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
1112 /* long long sub/cmp, then B (unsigned less than)
1113 --> test dst <u src */
1114 return unop(Iop_1Uto64,
1115 binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1116 }
1117 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
1118 /* long long sub/cmp, then NB (unsigned greater than or equal)
1119 --> test src <=u dst */
1120 /* Note, args are opposite way round from the usual */
1121 return unop(Iop_1Uto64,
1122 binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1123 }
1124
1125 /* 4, 5 */
1126 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
1127 /* long long sub/cmp, then Z --> test dst==src */
1128 return unop(Iop_1Uto64,
1129 binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
1130 }
1131 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
1132 /* long long sub/cmp, then NZ --> test dst!=src */
1133 return unop(Iop_1Uto64,
1134 binop(Iop_CmpNE64,cc_dep1,cc_dep2));
1135 }
1136
1137 /* 6, 7 */
1138 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
1139 /* long long sub/cmp, then BE (unsigned less than or equal)
1140 --> test dst <=u src */
1141 return unop(Iop_1Uto64,
1142 binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1143 }
1144 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) {
1145 /* long long sub/cmp, then NBE (unsigned greater than)
1146 --> test !(dst <=u src) */
1147 return binop(Iop_Xor64,
1148 unop(Iop_1Uto64,
1149 binop(Iop_CmpLE64U, cc_dep1, cc_dep2)),
1150 mkU64(1));
1151 }
1152
1153 /* 8, 9 */
1154 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondS)) {
1155 /* long long sub/cmp, then S (negative)
1156 --> (dst-src)[63]
1157 --> (dst-src) >>u 63 */
1158 return binop(Iop_Shr64,
1159 binop(Iop_Sub64, cc_dep1, cc_dep2),
1160 mkU8(63));
1161 }
1162 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNS)) {
1163 /* long long sub/cmp, then NS (not negative)
1164 --> (dst-src)[63] ^ 1
1165 --> ((dst-src) >>u 63) ^ 1 */
1166 return binop(Iop_Xor64,
1167 binop(Iop_Shr64,
1168 binop(Iop_Sub64, cc_dep1, cc_dep2),
1169 mkU8(63)),
1170 mkU64(1));
1171 }
1172
1173 /* 12, 13 */
1174 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
1175 /* long long sub/cmp, then L (signed less than)
1176 --> test dst <s src */
1177 return unop(Iop_1Uto64,
1178 binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1179 }
1180 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNL)) {
1181 /* long long sub/cmp, then NL (signed greater than or equal)
1182 --> test dst >=s src
1183 --> test src <=s dst */
1184 return unop(Iop_1Uto64,
1185 binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1186 }
1187
1188 /* 14, 15 */
1189 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondLE)) {
1190 /* long long sub/cmp, then LE (signed less than or equal)
1191 --> test dst <=s src */
1192 return unop(Iop_1Uto64,
1193 binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1194 }
1195 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) {
1196 /* long sub/cmp, then NLE (signed greater than)
1197 --> test !(dst <=s src)
1198 --> test (dst >s src)
1199 --> test (src <s dst) */
1200 return unop(Iop_1Uto64,
1201 binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1202
1203 }
1204
1205 /*---------------- SUBL ----------------*/
1206
1207 /* 0, */
1208 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondO)) {
1209 /* This is very commonly generated by Javascript JITs, for
1210 the idiom "do a 32-bit subtract and jump to out-of-line
1211 code if an overflow occurs". */
1212 /* long sub/cmp, then O (overflow)
1213 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31]
1214 --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1
1215 */
1216 vassert(isIRAtom(cc_dep1));
1217 vassert(isIRAtom(cc_dep2));
1218 return
1219 binop(Iop_And64,
1220 binop(Iop_Shr64,
1221 binop(Iop_And64,
1222 binop(Iop_Xor64, cc_dep1, cc_dep2),
1223 binop(Iop_Xor64,
1224 cc_dep1,
1225 binop(Iop_Sub64, cc_dep1, cc_dep2))),
1226 mkU8(31)),
1227 mkU64(1));
1228 }
1229 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNO)) {
1230 /* No action. Never yet found a test case. */
1231 }
1232
1233 /* 2, 3 */
1234 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
1235 /* long sub/cmp, then B (unsigned less than)
1236 --> test dst <u src */
1237 return unop(Iop_1Uto64,
1238 binop(Iop_CmpLT32U,
1239 unop(Iop_64to32, cc_dep1),
1240 unop(Iop_64to32, cc_dep2)));
1241 }
1242 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNB)) {
1243 /* long sub/cmp, then NB (unsigned greater than or equal)
1244 --> test src <=u dst */
1245 /* Note, args are opposite way round from the usual */
1246 return unop(Iop_1Uto64,
1247 binop(Iop_CmpLE32U,
1248 unop(Iop_64to32, cc_dep2),
1249 unop(Iop_64to32, cc_dep1)));
1250 }
1251
1252 /* 4, 5 */
1253 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
1254 /* long sub/cmp, then Z --> test dst==src */
1255 return unop(Iop_1Uto64,
1256 binop(Iop_CmpEQ32,
1257 unop(Iop_64to32, cc_dep1),
1258 unop(Iop_64to32, cc_dep2)));
1259 }
1260 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
1261 /* long sub/cmp, then NZ --> test dst!=src */
1262 return unop(Iop_1Uto64,
1263 binop(Iop_CmpNE32,
1264 unop(Iop_64to32, cc_dep1),
1265 unop(Iop_64to32, cc_dep2)));
1266 }
1267
1268 /* 6, 7 */
1269 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
1270 /* long sub/cmp, then BE (unsigned less than or equal)
1271 --> test dst <=u src */
1272 return unop(Iop_1Uto64,
1273 binop(Iop_CmpLE32U,
1274 unop(Iop_64to32, cc_dep1),
1275 unop(Iop_64to32, cc_dep2)));
1276 }
1277 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
1278 /* long sub/cmp, then NBE (unsigned greater than)
1279 --> test src <u dst */
1280 /* Note, args are opposite way round from the usual */
1281 return unop(Iop_1Uto64,
1282 binop(Iop_CmpLT32U,
1283 unop(Iop_64to32, cc_dep2),
1284 unop(Iop_64to32, cc_dep1)));
1285 }
1286
1287 /* 8, 9 */
1288 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
1289 /* long sub/cmp, then S (negative)
1290 --> (dst-src)[31]
1291 --> ((dst -64 src) >>u 31) & 1
1292 Pointless to narrow the args to 32 bit before the subtract. */
1293 return binop(Iop_And64,
1294 binop(Iop_Shr64,
1295 binop(Iop_Sub64, cc_dep1, cc_dep2),
1296 mkU8(31)),
1297 mkU64(1));
1298 }
1299 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNS)) {
1300 /* long sub/cmp, then NS (not negative)
1301 --> (dst-src)[31] ^ 1
1302 --> (((dst -64 src) >>u 31) & 1) ^ 1
1303 Pointless to narrow the args to 32 bit before the subtract. */
1304 return binop(Iop_Xor64,
1305 binop(Iop_And64,
1306 binop(Iop_Shr64,
1307 binop(Iop_Sub64, cc_dep1, cc_dep2),
1308 mkU8(31)),
1309 mkU64(1)),
1310 mkU64(1));
1311 }
1312
1313 /* 12, 13 */
1314 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
1315 /* long sub/cmp, then L (signed less than)
1316 --> test dst <s src */
1317 return unop(Iop_1Uto64,
1318 binop(Iop_CmpLT32S,
1319 unop(Iop_64to32, cc_dep1),
1320 unop(Iop_64to32, cc_dep2)));
1321 }
1322 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNL)) {
1323 /* long sub/cmp, then NL (signed greater than or equal)
1324 --> test dst >=s src
1325 --> test src <=s dst */
1326 return unop(Iop_1Uto64,
1327 binop(Iop_CmpLE32S,
1328 unop(Iop_64to32, cc_dep2),
1329 unop(Iop_64to32, cc_dep1)));
1330 }
1331
1332 /* 14, 15 */
1333 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
1334 /* long sub/cmp, then LE (signed less than or equal)
1335 --> test dst <=s src */
1336 return unop(Iop_1Uto64,
1337 binop(Iop_CmpLE32S,
1338 unop(Iop_64to32, cc_dep1),
1339 unop(Iop_64to32, cc_dep2)));
1340
1341 }
1342 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
1343 /* long sub/cmp, then NLE (signed greater than)
1344 --> test !(dst <=s src)
1345 --> test (dst >s src)
1346 --> test (src <s dst) */
1347 return unop(Iop_1Uto64,
1348 binop(Iop_CmpLT32S,
1349 unop(Iop_64to32, cc_dep2),
1350 unop(Iop_64to32, cc_dep1)));
1351
1352 }
1353
1354 /*---------------- SUBW ----------------*/
1355
1356 /* 4, 5 */
1357 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
1358 /* word sub/cmp, then Z --> test dst==src */
1359 return unop(Iop_1Uto64,
1360 binop(Iop_CmpEQ16,
1361 unop(Iop_64to16,cc_dep1),
1362 unop(Iop_64to16,cc_dep2)));
1363 }
1364 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
1365 /* word sub/cmp, then NZ --> test dst!=src */
1366 return unop(Iop_1Uto64,
1367 binop(Iop_CmpNE16,
1368 unop(Iop_64to16,cc_dep1),
1369 unop(Iop_64to16,cc_dep2)));
1370 }
1371
1372 /* 6, */
1373 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) {
1374 /* word sub/cmp, then BE (unsigned less than or equal)
1375 --> test dst <=u src */
1376 return unop(Iop_1Uto64,
1377 binop(Iop_CmpLE64U,
1378 binop(Iop_Shl64, cc_dep1, mkU8(48)),
1379 binop(Iop_Shl64, cc_dep2, mkU8(48))));
1380 }
1381
1382 /* 8, 9 */
1383 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondS)
1384 && isU64(cc_dep2, 0)) {
1385 /* word sub/cmp of zero, then S --> test (dst-0 <s 0)
1386 --> test dst <s 0
1387 --> (ULong)dst[15]
1388 This is yet another scheme by which clang figures out if the
1389 top bit of a word is 1 or 0. See also LOGICB/CondS below. */
1390 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1391 for an 16-bit comparison, since the args to the helper
1392 function are always U64s. */
1393 return binop(Iop_And64,
1394 binop(Iop_Shr64,cc_dep1,mkU8(15)),
1395 mkU64(1));
1396 }
1397 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNS)
1398 && isU64(cc_dep2, 0)) {
1399 /* word sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1400 --> test !(dst <s 0)
1401 --> (ULong) !dst[15]
1402 */
1403 return binop(Iop_Xor64,
1404 binop(Iop_And64,
1405 binop(Iop_Shr64,cc_dep1,mkU8(15)),
1406 mkU64(1)),
1407 mkU64(1));
1408 }
1409
1410 /* 14, */
1411 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
1412 /* word sub/cmp, then LE (signed less than or equal)
1413 --> test dst <=s src */
1414 return unop(Iop_1Uto64,
1415 binop(Iop_CmpLE64S,
1416 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1417 binop(Iop_Shl64,cc_dep2,mkU8(48))));
1418
1419 }
1420
1421 /*---------------- SUBB ----------------*/
1422
1423 /* 2, 3 */
1424 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) {
1425 /* byte sub/cmp, then B (unsigned less than)
1426 --> test dst <u src */
1427 return unop(Iop_1Uto64,
1428 binop(Iop_CmpLT64U,
1429 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1430 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1431 }
1432 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) {
1433 /* byte sub/cmp, then NB (unsigned greater than or equal)
1434 --> test src <=u dst */
1435 /* Note, args are opposite way round from the usual */
1436 return unop(Iop_1Uto64,
1437 binop(Iop_CmpLE64U,
1438 binop(Iop_And64, cc_dep2, mkU64(0xFF)),
1439 binop(Iop_And64, cc_dep1, mkU64(0xFF))));
1440 }
1441
1442 /* 4, 5 */
1443 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
1444 /* byte sub/cmp, then Z --> test dst==src */
1445 return unop(Iop_1Uto64,
1446 binop(Iop_CmpEQ8,
1447 unop(Iop_64to8,cc_dep1),
1448 unop(Iop_64to8,cc_dep2)));
1449 }
1450 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
1451 /* byte sub/cmp, then NZ --> test dst!=src */
1452 return unop(Iop_1Uto64,
1453 binop(Iop_CmpNE8,
1454 unop(Iop_64to8,cc_dep1),
1455 unop(Iop_64to8,cc_dep2)));
1456 }
1457
1458 /* 6, */
1459 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
1460 /* byte sub/cmp, then BE (unsigned less than or equal)
1461 --> test dst <=u src */
1462 return unop(Iop_1Uto64,
1463 binop(Iop_CmpLE64U,
1464 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1465 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1466 }
1467
1468 /* 8, 9 */
1469 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
1470 && isU64(cc_dep2, 0)) {
1471 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1472 --> test dst <s 0
1473 --> (ULong)dst[7]
1474 This is yet another scheme by which gcc figures out if the
1475 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1476 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1477 for an 8-bit comparison, since the args to the helper
1478 function are always U64s. */
1479 return binop(Iop_And64,
1480 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1481 mkU64(1));
1482 }
1483 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
1484 && isU64(cc_dep2, 0)) {
1485 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1486 --> test !(dst <s 0)
1487 --> (ULong) !dst[7]
1488 */
1489 return binop(Iop_Xor64,
1490 binop(Iop_And64,
1491 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1492 mkU64(1)),
1493 mkU64(1));
1494 }
1495
1496 /*---------------- LOGICQ ----------------*/
1497
1498 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
1499 /* long long and/or/xor, then Z --> test dst==0 */
1500 return unop(Iop_1Uto64,
1501 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1502 }
1503 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
1504 /* long long and/or/xor, then NZ --> test dst!=0 */
1505 return unop(Iop_1Uto64,
1506 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1507 }
1508
1509 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
1510 /* long long and/or/xor, then L
1511 LOGIC sets SF and ZF according to the
1512 result and makes OF be zero. L computes SF ^ OF, but
1513 OF is zero, so this reduces to SF -- which will be 1 iff
1514 the result is < signed 0. Hence ...
1515 */
1516 return unop(Iop_1Uto64,
1517 binop(Iop_CmpLT64S,
1518 cc_dep1,
1519 mkU64(0)));
1520 }
1521
1522 /*---------------- LOGICL ----------------*/
1523
1524 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
1525 /* long and/or/xor, then Z --> test dst==0 */
1526 return unop(Iop_1Uto64,
1527 binop(Iop_CmpEQ32,
1528 unop(Iop_64to32, cc_dep1),
1529 mkU32(0)));
1530 }
1531 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
1532 /* long and/or/xor, then NZ --> test dst!=0 */
1533 return unop(Iop_1Uto64,
1534 binop(Iop_CmpNE32,
1535 unop(Iop_64to32, cc_dep1),
1536 mkU32(0)));
1537 }
1538
1539 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
1540 /* long and/or/xor, then LE
1541 This is pretty subtle. LOGIC sets SF and ZF according to the
1542 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but
1543 OF is zero, so this reduces to SF | ZF -- which will be 1 iff
1544 the result is <=signed 0. Hence ...
1545 */
1546 return unop(Iop_1Uto64,
1547 binop(Iop_CmpLE32S,
1548 unop(Iop_64to32, cc_dep1),
1549 mkU32(0)));
1550 }
1551
1552 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
1553 /* long and/or/xor, then S --> (ULong)result[31] */
1554 return binop(Iop_And64,
1555 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1556 mkU64(1));
1557 }
1558 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
1559 /* long and/or/xor, then S --> (ULong) ~ result[31] */
1560 return binop(Iop_Xor64,
1561 binop(Iop_And64,
1562 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1563 mkU64(1)),
1564 mkU64(1));
1565 }
1566
1567 /*---------------- LOGICW ----------------*/
1568
1569 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) {
1570 /* word and/or/xor, then Z --> test dst==0 */
1571 return unop(Iop_1Uto64,
1572 binop(Iop_CmpEQ64,
1573 binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
1574 mkU64(0)));
1575 }
1576 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) {
1577 /* word and/or/xor, then NZ --> test dst!=0 */
1578 return unop(Iop_1Uto64,
1579 binop(Iop_CmpNE64,
1580 binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
1581 mkU64(0)));
1582 }
1583
1584 /*---------------- LOGICB ----------------*/
1585
1586 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
1587 /* byte and/or/xor, then Z --> test dst==0 */
1588 return unop(Iop_1Uto64,
1589 binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
1590 mkU64(0)));
1591 }
1592 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
1593 /* byte and/or/xor, then NZ --> test dst!=0 */
1594 return unop(Iop_1Uto64,
1595 binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
1596 mkU64(0)));
1597 }
1598
1599 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
1600 /* this is an idiom gcc sometimes uses to find out if the top
1601 bit of a byte register is set: eg testb %al,%al; js ..
1602 Since it just depends on the top bit of the byte, extract
1603 that bit and explicitly get rid of all the rest. This
1604 helps memcheck avoid false positives in the case where any
1605 of the other bits in the byte are undefined. */
1606 /* byte and/or/xor, then S --> (UInt)result[7] */
1607 return binop(Iop_And64,
1608 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1609 mkU64(1));
1610 }
1611 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
1612 /* byte and/or/xor, then NS --> (UInt)!result[7] */
1613 return binop(Iop_Xor64,
1614 binop(Iop_And64,
1615 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1616 mkU64(1)),
1617 mkU64(1));
1618 }
1619
1620 /*---------------- INCB ----------------*/
1621
1622 if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
1623 /* 8-bit inc, then LE --> sign bit of the arg */
1624 return binop(Iop_And64,
1625 binop(Iop_Shr64,
1626 binop(Iop_Sub64, cc_dep1, mkU64(1)),
1627 mkU8(7)),
1628 mkU64(1));
1629 }
1630
1631 /*---------------- INCW ----------------*/
1632
1633 if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
1634 /* 16-bit inc, then Z --> test dst == 0 */
1635 return unop(Iop_1Uto64,
1636 binop(Iop_CmpEQ64,
1637 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1638 mkU64(0)));
1639 }
1640
1641 /*---------------- DECL ----------------*/
1642
1643 if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
1644 /* dec L, then Z --> test dst == 0 */
1645 return unop(Iop_1Uto64,
1646 binop(Iop_CmpEQ32,
1647 unop(Iop_64to32, cc_dep1),
1648 mkU32(0)));
1649 }
1650
1651 /*---------------- DECW ----------------*/
1652
1653 if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
1654 /* 16-bit dec, then NZ --> test dst != 0 */
1655 return unop(Iop_1Uto64,
1656 binop(Iop_CmpNE64,
1657 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1658 mkU64(0)));
1659 }
1660
1661 /*---------------- SHRQ ----------------*/
1662
1663 if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondZ)) {
1664 /* SHRQ, then Z --> test dep1 == 0 */
1665 return unop(Iop_1Uto64,
1666 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1667 }
1668 if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNZ)) {
1669 /* SHRQ, then NZ --> test dep1 != 0 */
1670 return unop(Iop_1Uto64,
1671 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1672 }
1673
1674 /*---------------- SHRL ----------------*/
1675
1676 if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondZ)) {
1677 /* SHRL, then Z --> test dep1 == 0 */
1678 return unop(Iop_1Uto64,
1679 binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
1680 mkU32(0)));
1681 }
1682 if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondNZ)) {
1683 /* SHRL, then NZ --> test dep1 != 0 */
1684 return unop(Iop_1Uto64,
1685 binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1),
1686 mkU32(0)));
1687 }
1688
1689 /*---------------- COPY ----------------*/
1690 /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1691 jbe" for example. */
1692
1693 if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
1694 (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
1695 /* COPY, then BE --> extract C and Z from dep1, and test (C
1696 or Z == 1). */
1697 /* COPY, then NBE --> extract C and Z from dep1, and test (C
1698 or Z == 0). */
1699 ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
1700 return
1701 unop(
1702 Iop_1Uto64,
1703 binop(
1704 Iop_CmpEQ64,
1705 binop(
1706 Iop_And64,
1707 binop(
1708 Iop_Or64,
1709 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1710 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
1711 ),
1712 mkU64(1)
1713 ),
1714 mkU64(nnn)
1715 )
1716 );
1717 }
1718
1719 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
1720 /* COPY, then B --> extract C dep1, and test (C == 1). */
1721 return
1722 unop(
1723 Iop_1Uto64,
1724 binop(
1725 Iop_CmpNE64,
1726 binop(
1727 Iop_And64,
1728 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1729 mkU64(1)
1730 ),
1731 mkU64(0)
1732 )
1733 );
1734 }
1735
1736 if (isU64(cc_op, AMD64G_CC_OP_COPY)
1737 && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
1738 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1739 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1740 UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
1741 return
1742 unop(
1743 Iop_1Uto64,
1744 binop(
1745 Iop_CmpEQ64,
1746 binop(
1747 Iop_And64,
1748 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
1749 mkU64(1)
1750 ),
1751 mkU64(nnn)
1752 )
1753 );
1754 }
1755
1756 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
1757 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1758 return
1759 unop(
1760 Iop_1Uto64,
1761 binop(
1762 Iop_CmpNE64,
1763 binop(
1764 Iop_And64,
1765 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
1766 mkU64(1)
1767 ),
1768 mkU64(0)
1769 )
1770 );
1771 }
1772
1773 return NULL;
1774 }
1775
1776 /* --------- specialising "amd64g_calculate_rflags_c" --------- */
1777
1778 if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
1779 /* specialise calls to above "calculate_rflags_c" function */
1780 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1781 vassert(arity == 4);
1782 cc_op = args[0];
1783 cc_dep1 = args[1];
1784 cc_dep2 = args[2];
1785 cc_ndep = args[3];
1786
1787 if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
1788 /* C after sub denotes unsigned less than */
1789 return unop(Iop_1Uto64,
1790 binop(Iop_CmpLT64U,
1791 cc_dep1,
1792 cc_dep2));
1793 }
1794 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
1795 /* C after sub denotes unsigned less than */
1796 return unop(Iop_1Uto64,
1797 binop(Iop_CmpLT32U,
1798 unop(Iop_64to32, cc_dep1),
1799 unop(Iop_64to32, cc_dep2)));
1800 }
1801 if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
1802 /* C after sub denotes unsigned less than */
1803 return unop(Iop_1Uto64,
1804 binop(Iop_CmpLT64U,
1805 binop(Iop_And64,cc_dep1,mkU64(0xFF)),
1806 binop(Iop_And64,cc_dep2,mkU64(0xFF))));
1807 }
1808 if (isU64(cc_op, AMD64G_CC_OP_ADDQ)) {
1809 /* C after add denotes sum <u either arg */
1810 return unop(Iop_1Uto64,
1811 binop(Iop_CmpLT64U,
1812 binop(Iop_Add64, cc_dep1, cc_dep2),
1813 cc_dep1));
1814 }
1815 if (isU64(cc_op, AMD64G_CC_OP_ADDL)) {
1816 /* C after add denotes sum <u either arg */
1817 return unop(Iop_1Uto64,
1818 binop(Iop_CmpLT32U,
1819 unop(Iop_64to32, binop(Iop_Add64, cc_dep1, cc_dep2)),
1820 unop(Iop_64to32, cc_dep1)));
1821 }
1822 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
1823 || isU64(cc_op, AMD64G_CC_OP_LOGICL)
1824 || isU64(cc_op, AMD64G_CC_OP_LOGICW)
1825 || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
1826 /* cflag after logic is zero */
1827 return mkU64(0);
1828 }
1829 if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
1830 || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
1831 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1832 return cc_ndep;
1833 }
1834
1835 # if 0
1836 if (cc_op->tag == Iex_Const) {
1837 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1838 }
1839 # endif
1840
1841 return NULL;
1842 }
1843
1844 # undef unop
1845 # undef binop
1846 # undef mkU64
1847 # undef mkU32
1848 # undef mkU8
1849
1850 return NULL;
1851 }
1852
1853
1854 /*---------------------------------------------------------------*/
1855 /*--- Supporting functions for x87 FPU activities. ---*/
1856 /*---------------------------------------------------------------*/
1857
host_is_little_endian(void)1858 static inline Bool host_is_little_endian ( void )
1859 {
1860 UInt x = 0x76543210;
1861 UChar* p = (UChar*)(&x);
1862 return toBool(*p == 0x10);
1863 }
1864
1865 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1866 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_FXAM(ULong tag,ULong dbl)1867 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
1868 {
1869 Bool mantissaIsZero;
1870 Int bexp;
1871 UChar sign;
1872 UChar* f64;
1873
1874 vassert(host_is_little_endian());
1875
1876 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1877
1878 f64 = (UChar*)(&dbl);
1879 sign = toUChar( (f64[7] >> 7) & 1 );
1880
1881 /* First off, if the tag indicates the register was empty,
1882 return 1,0,sign,1 */
1883 if (tag == 0) {
1884 /* vex_printf("Empty\n"); */
1885 return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
1886 | AMD64G_FC_MASK_C0;
1887 }
1888
1889 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1890 bexp &= 0x7FF;
1891
1892 mantissaIsZero
1893 = toBool(
1894 (f64[6] & 0x0F) == 0
1895 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1896 );
1897
1898 /* If both exponent and mantissa are zero, the value is zero.
1899 Return 1,0,sign,0. */
1900 if (bexp == 0 && mantissaIsZero) {
1901 /* vex_printf("Zero\n"); */
1902 return AMD64G_FC_MASK_C3 | 0
1903 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1904 }
1905
1906 /* If exponent is zero but mantissa isn't, it's a denormal.
1907 Return 1,1,sign,0. */
1908 if (bexp == 0 && !mantissaIsZero) {
1909 /* vex_printf("Denormal\n"); */
1910 return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
1911 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1912 }
1913
1914 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1915 Return 0,1,sign,1. */
1916 if (bexp == 0x7FF && mantissaIsZero) {
1917 /* vex_printf("Inf\n"); */
1918 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
1919 | AMD64G_FC_MASK_C0;
1920 }
1921
1922 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1923 Return 0,0,sign,1. */
1924 if (bexp == 0x7FF && !mantissaIsZero) {
1925 /* vex_printf("NaN\n"); */
1926 return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
1927 }
1928
1929 /* Uh, ok, we give up. It must be a normal finite number.
1930 Return 0,1,sign,0.
1931 */
1932 /* vex_printf("normal\n"); */
1933 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1934 }
1935
1936
1937 /* This is used to implement both 'frstor' and 'fldenv'. The latter
1938 appears to differ from the former only in that the 8 FP registers
1939 themselves are not transferred into the guest state. */
1940 static
do_put_x87(Bool moveRegs,Fpu_State * x87_state,VexGuestAMD64State * vex_state)1941 VexEmNote do_put_x87 ( Bool moveRegs,
1942 /*IN*/Fpu_State* x87_state,
1943 /*OUT*/VexGuestAMD64State* vex_state )
1944 {
1945 Int stno, preg;
1946 UInt tag;
1947 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1948 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1949 UInt ftop = (x87_state->env[FP_ENV_STAT] >> 11) & 7;
1950 UInt tagw = x87_state->env[FP_ENV_TAG];
1951 UInt fpucw = x87_state->env[FP_ENV_CTRL];
1952 UInt c3210 = x87_state->env[FP_ENV_STAT] & 0x4700;
1953 VexEmNote ew;
1954 UInt fpround;
1955 ULong pair;
1956
1957 /* Copy registers and tags */
1958 for (stno = 0; stno < 8; stno++) {
1959 preg = (stno + ftop) & 7;
1960 tag = (tagw >> (2*preg)) & 3;
1961 if (tag == 3) {
1962 /* register is empty */
1963 /* hmm, if it's empty, does it still get written? Probably
1964 safer to say it does. If we don't, memcheck could get out
1965 of sync, in that it thinks all FP registers are defined by
1966 this helper, but in reality some have not been updated. */
1967 if (moveRegs)
1968 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1969 vexTags[preg] = 0;
1970 } else {
1971 /* register is non-empty */
1972 if (moveRegs)
1973 convert_f80le_to_f64le( &x87_state->reg[10*stno],
1974 (UChar*)&vexRegs[preg] );
1975 vexTags[preg] = 1;
1976 }
1977 }
1978
1979 /* stack pointer */
1980 vex_state->guest_FTOP = ftop;
1981
1982 /* status word */
1983 vex_state->guest_FC3210 = c3210;
1984
1985 /* handle the control word, setting FPROUND and detecting any
1986 emulation warnings. */
1987 pair = amd64g_check_fldcw ( (ULong)fpucw );
1988 fpround = (UInt)pair & 0xFFFFFFFFULL;
1989 ew = (VexEmNote)(pair >> 32);
1990
1991 vex_state->guest_FPROUND = fpround & 3;
1992
1993 /* emulation warnings --> caller */
1994 return ew;
1995 }
1996
1997
1998 /* Create an x87 FPU state from the guest state, as close as
1999 we can approximate it. */
2000 static
do_get_x87(VexGuestAMD64State * vex_state,Fpu_State * x87_state)2001 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
2002 /*OUT*/Fpu_State* x87_state )
2003 {
2004 Int i, stno, preg;
2005 UInt tagw;
2006 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2007 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2008 UInt ftop = vex_state->guest_FTOP;
2009 UInt c3210 = vex_state->guest_FC3210;
2010
2011 for (i = 0; i < 14; i++)
2012 x87_state->env[i] = 0;
2013
2014 x87_state->env[1] = x87_state->env[3] = x87_state->env[5]
2015 = x87_state->env[13] = 0xFFFF;
2016 x87_state->env[FP_ENV_STAT]
2017 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
2018 x87_state->env[FP_ENV_CTRL]
2019 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
2020
2021 /* Dump the register stack in ST order. */
2022 tagw = 0;
2023 for (stno = 0; stno < 8; stno++) {
2024 preg = (stno + ftop) & 7;
2025 if (vexTags[preg] == 0) {
2026 /* register is empty */
2027 tagw |= (3 << (2*preg));
2028 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2029 &x87_state->reg[10*stno] );
2030 } else {
2031 /* register is full. */
2032 tagw |= (0 << (2*preg));
2033 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2034 &x87_state->reg[10*stno] );
2035 }
2036 }
2037 x87_state->env[FP_ENV_TAG] = toUShort(tagw);
2038 }
2039
2040
2041 /*---------------------------------------------------------------*/
2042 /*--- Supporting functions for XSAVE/FXSAVE. ---*/
2043 /*---------------------------------------------------------------*/
2044
2045 /* CALLED FROM GENERATED CODE */
2046 /* DIRTY HELPER (reads guest state, writes guest mem) */
2047 /* XSAVE component 0 is the x87 FPU state. */
amd64g_dirtyhelper_XSAVE_COMPONENT_0(VexGuestAMD64State * gst,HWord addr)2048 void amd64g_dirtyhelper_XSAVE_COMPONENT_0
2049 ( VexGuestAMD64State* gst, HWord addr )
2050 {
2051 /* Derived from values obtained from
2052 vendor_id : AuthenticAMD
2053 cpu family : 15
2054 model : 12
2055 model name : AMD Athlon(tm) 64 Processor 3200+
2056 stepping : 0
2057 cpu MHz : 2200.000
2058 cache size : 512 KB
2059 */
2060 /* Somewhat roundabout, but at least it's simple. */
2061 Fpu_State tmp;
2062 UShort* addrS = (UShort*)addr;
2063 UChar* addrC = (UChar*)addr;
2064 UShort fp_tags;
2065 UInt summary_tags;
2066 Int r, stno;
2067 UShort *srcS, *dstS;
2068
2069 do_get_x87( gst, &tmp );
2070
2071 /* Now build the proper fxsave x87 image from the fsave x87 image
2072 we just made. */
2073
2074 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
2075 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
2076
2077 /* set addrS[2] in an endian-independent way */
2078 summary_tags = 0;
2079 fp_tags = tmp.env[FP_ENV_TAG];
2080 for (r = 0; r < 8; r++) {
2081 if ( ((fp_tags >> (2*r)) & 3) != 3 )
2082 summary_tags |= (1 << r);
2083 }
2084 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
2085 addrC[5] = 0; /* pad */
2086
2087 /* FOP: faulting fpu opcode. From experimentation, the real CPU
2088 does not write this field. (?!) */
2089 addrS[3] = 0; /* BOGUS */
2090
2091 /* RIP (Last x87 instruction pointer). From experimentation, the
2092 real CPU does not write this field. (?!) */
2093 addrS[4] = 0; /* BOGUS */
2094 addrS[5] = 0; /* BOGUS */
2095 addrS[6] = 0; /* BOGUS */
2096 addrS[7] = 0; /* BOGUS */
2097
2098 /* RDP (Last x87 data pointer). From experimentation, the real CPU
2099 does not write this field. (?!) */
2100 addrS[8] = 0; /* BOGUS */
2101 addrS[9] = 0; /* BOGUS */
2102 addrS[10] = 0; /* BOGUS */
2103 addrS[11] = 0; /* BOGUS */
2104
2105 /* addrS[13,12] are MXCSR -- not written */
2106 /* addrS[15,14] are MXCSR_MASK -- not written */
2107
2108 /* Copy in the FP registers, in ST order. */
2109 for (stno = 0; stno < 8; stno++) {
2110 srcS = (UShort*)(&tmp.reg[10*stno]);
2111 dstS = (UShort*)(&addrS[16 + 8*stno]);
2112 dstS[0] = srcS[0];
2113 dstS[1] = srcS[1];
2114 dstS[2] = srcS[2];
2115 dstS[3] = srcS[3];
2116 dstS[4] = srcS[4];
2117 dstS[5] = 0;
2118 dstS[6] = 0;
2119 dstS[7] = 0;
2120 }
2121 }
2122
2123
2124 /* CALLED FROM GENERATED CODE */
2125 /* DIRTY HELPER (reads guest state, writes guest mem) */
2126 /* XSAVE component 1 is the SSE state. */
amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(VexGuestAMD64State * gst,HWord addr)2127 void amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
2128 ( VexGuestAMD64State* gst, HWord addr )
2129 {
2130 UShort* addrS = (UShort*)addr;
2131 UInt mxcsr;
2132
2133 /* The only non-register parts of the SSE state are MXCSR and
2134 MXCSR_MASK. */
2135 mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
2136
2137 addrS[12] = toUShort(mxcsr); /* MXCSR */
2138 addrS[13] = toUShort(mxcsr >> 16);
2139
2140 addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
2141 addrS[15] = 0x0000; /* MXCSR mask (hi16) */
2142 }
2143
2144
2145 /* VISIBLE TO LIBVEX CLIENT */
2146 /* Do FXSAVE from the supplied VexGuestAMD64State structure and store
2147 the result at the given address which represents a buffer of at
2148 least 416 bytes.
2149
2150 This function is not called from generated code. FXSAVE is dealt
2151 with by the amd64 front end by calling the XSAVE_COMPONENT_{0,1}
2152 functions above plus some in-line IR. This function is merely a
2153 convenience function for VEX's users.
2154 */
LibVEX_GuestAMD64_fxsave(VexGuestAMD64State * gst,HWord fp_state)2155 void LibVEX_GuestAMD64_fxsave ( /*IN*/VexGuestAMD64State* gst,
2156 /*OUT*/HWord fp_state )
2157 {
2158 /* Do the x87 part */
2159 amd64g_dirtyhelper_XSAVE_COMPONENT_0(gst, fp_state);
2160
2161 /* And now the SSE part, except for the registers themselves. */
2162 amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state);
2163
2164 /* That's the first 160 bytes of the image done. */
2165 /* Now only %xmm0 .. %xmm15 remain to be copied. If the host is
2166 big-endian, these need to be byte-swapped. */
2167 U128 *xmm = (U128 *)(fp_state + 160);
2168 vassert(host_is_little_endian());
2169
2170 # define COPY_U128(_dst,_src) \
2171 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
2172 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
2173 while (0)
2174
2175 COPY_U128( xmm[0], gst->guest_YMM0 );
2176 COPY_U128( xmm[1], gst->guest_YMM1 );
2177 COPY_U128( xmm[2], gst->guest_YMM2 );
2178 COPY_U128( xmm[3], gst->guest_YMM3 );
2179 COPY_U128( xmm[4], gst->guest_YMM4 );
2180 COPY_U128( xmm[5], gst->guest_YMM5 );
2181 COPY_U128( xmm[6], gst->guest_YMM6 );
2182 COPY_U128( xmm[7], gst->guest_YMM7 );
2183 COPY_U128( xmm[8], gst->guest_YMM8 );
2184 COPY_U128( xmm[9], gst->guest_YMM9 );
2185 COPY_U128( xmm[10], gst->guest_YMM10 );
2186 COPY_U128( xmm[11], gst->guest_YMM11 );
2187 COPY_U128( xmm[12], gst->guest_YMM12 );
2188 COPY_U128( xmm[13], gst->guest_YMM13 );
2189 COPY_U128( xmm[14], gst->guest_YMM14 );
2190 COPY_U128( xmm[15], gst->guest_YMM15 );
2191 # undef COPY_U128
2192 }
2193
2194
2195 /*---------------------------------------------------------------*/
2196 /*--- Supporting functions for XRSTOR/FXRSTOR. ---*/
2197 /*---------------------------------------------------------------*/
2198
2199 /* CALLED FROM GENERATED CODE */
2200 /* DIRTY HELPER (writes guest state, reads guest mem) */
amd64g_dirtyhelper_XRSTOR_COMPONENT_0(VexGuestAMD64State * gst,HWord addr)2201 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_0
2202 ( VexGuestAMD64State* gst, HWord addr )
2203 {
2204 Fpu_State tmp;
2205 UShort* addrS = (UShort*)addr;
2206 UChar* addrC = (UChar*)addr;
2207 UShort fp_tags;
2208 Int r, stno, i;
2209
2210 /* Copy the x87 registers out of the image, into a temporary
2211 Fpu_State struct. */
2212 for (i = 0; i < 14; i++) tmp.env[i] = 0;
2213 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
2214 /* fill in tmp.reg[0..7] */
2215 for (stno = 0; stno < 8; stno++) {
2216 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
2217 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
2218 dstS[0] = srcS[0];
2219 dstS[1] = srcS[1];
2220 dstS[2] = srcS[2];
2221 dstS[3] = srcS[3];
2222 dstS[4] = srcS[4];
2223 }
2224 /* fill in tmp.env[0..13] */
2225 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
2226 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
2227
2228 fp_tags = 0;
2229 for (r = 0; r < 8; r++) {
2230 if (addrC[4] & (1<<r))
2231 fp_tags |= (0 << (2*r)); /* EMPTY */
2232 else
2233 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
2234 }
2235 tmp.env[FP_ENV_TAG] = fp_tags;
2236
2237 /* Now write 'tmp' into the guest state. */
2238 VexEmNote warnX87 = do_put_x87( True/*moveRegs*/, &tmp, gst );
2239
2240 return warnX87;
2241 }
2242
2243
2244 /* CALLED FROM GENERATED CODE */
2245 /* DIRTY HELPER (writes guest state, reads guest mem) */
amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(VexGuestAMD64State * gst,HWord addr)2246 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
2247 ( VexGuestAMD64State* gst, HWord addr )
2248 {
2249 UShort* addrS = (UShort*)addr;
2250 UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
2251 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
2252 ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
2253
2254 VexEmNote warnXMM = (VexEmNote)(w64 >> 32);
2255
2256 gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
2257 return warnXMM;
2258 }
2259
2260
2261 /* VISIBLE TO LIBVEX CLIENT */
2262 /* Do FXRSTOR from the supplied address and store read values to the given
2263 VexGuestAMD64State structure.
2264
2265 This function is not called from generated code. FXRSTOR is dealt
2266 with by the amd64 front end by calling the XRSTOR_COMPONENT_{0,1}
2267 functions above plus some in-line IR. This function is merely a
2268 convenience function for VEX's users.
2269 */
LibVEX_GuestAMD64_fxrstor(HWord fp_state,VexGuestAMD64State * gst)2270 VexEmNote LibVEX_GuestAMD64_fxrstor ( /*IN*/HWord fp_state,
2271 /*MOD*/VexGuestAMD64State* gst )
2272 {
2273 /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need
2274 to be byte-swapped. */
2275 U128 *xmm = (U128 *)(fp_state + 160);
2276
2277 vassert(host_is_little_endian());
2278
2279 # define COPY_U128(_dst,_src) \
2280 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
2281 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
2282 while (0)
2283
2284 COPY_U128( gst->guest_YMM0, xmm[0] );
2285 COPY_U128( gst->guest_YMM1, xmm[1] );
2286 COPY_U128( gst->guest_YMM2, xmm[2] );
2287 COPY_U128( gst->guest_YMM3, xmm[3] );
2288 COPY_U128( gst->guest_YMM4, xmm[4] );
2289 COPY_U128( gst->guest_YMM5, xmm[5] );
2290 COPY_U128( gst->guest_YMM6, xmm[6] );
2291 COPY_U128( gst->guest_YMM7, xmm[7] );
2292 COPY_U128( gst->guest_YMM8, xmm[8] );
2293 COPY_U128( gst->guest_YMM9, xmm[9] );
2294 COPY_U128( gst->guest_YMM10, xmm[10] );
2295 COPY_U128( gst->guest_YMM11, xmm[11] );
2296 COPY_U128( gst->guest_YMM12, xmm[12] );
2297 COPY_U128( gst->guest_YMM13, xmm[13] );
2298 COPY_U128( gst->guest_YMM14, xmm[14] );
2299 COPY_U128( gst->guest_YMM15, xmm[15] );
2300
2301 # undef COPY_U128
2302
2303 VexEmNote warnXMM
2304 = amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state);
2305 VexEmNote warnX87
2306 = amd64g_dirtyhelper_XRSTOR_COMPONENT_0(gst, fp_state);
2307
2308 /* Prefer an X87 emwarn over an XMM one, if both exist. */
2309 if (warnX87 != EmNote_NONE)
2310 return warnX87;
2311 else
2312 return warnXMM;
2313 }
2314
2315
2316 /*---------------------------------------------------------------*/
2317 /*--- Supporting functions for FSAVE/FRSTOR ---*/
2318 /*---------------------------------------------------------------*/
2319
2320 /* DIRTY HELPER (writes guest state) */
2321 /* Initialise the x87 FPU state as per 'finit'. */
amd64g_dirtyhelper_FINIT(VexGuestAMD64State * gst)2322 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
2323 {
2324 Int i;
2325 gst->guest_FTOP = 0;
2326 for (i = 0; i < 8; i++) {
2327 gst->guest_FPTAG[i] = 0; /* empty */
2328 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
2329 }
2330 gst->guest_FPROUND = (ULong)Irrm_NEAREST;
2331 gst->guest_FC3210 = 0;
2332 }
2333
2334
2335 /* CALLED FROM GENERATED CODE */
2336 /* DIRTY HELPER (reads guest memory) */
amd64g_dirtyhelper_loadF80le(Addr addrU)2337 ULong amd64g_dirtyhelper_loadF80le ( Addr addrU )
2338 {
2339 ULong f64;
2340 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
2341 return f64;
2342 }
2343
2344 /* CALLED FROM GENERATED CODE */
2345 /* DIRTY HELPER (writes guest memory) */
amd64g_dirtyhelper_storeF80le(Addr addrU,ULong f64)2346 void amd64g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
2347 {
2348 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
2349 }
2350
2351
2352 /* CALLED FROM GENERATED CODE */
2353 /* CLEAN HELPER */
2354 /* mxcsr[15:0] contains a SSE native format MXCSR value.
2355 Extract from it the required SSEROUND value and any resulting
2356 emulation warning, and return (warn << 32) | sseround value.
2357 */
amd64g_check_ldmxcsr(ULong mxcsr)2358 ULong amd64g_check_ldmxcsr ( ULong mxcsr )
2359 {
2360 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
2361 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2362 ULong rmode = (mxcsr >> 13) & 3;
2363
2364 /* Detect any required emulation warnings. */
2365 VexEmNote ew = EmNote_NONE;
2366
2367 if ((mxcsr & 0x1F80) != 0x1F80) {
2368 /* unmasked exceptions! */
2369 ew = EmWarn_X86_sseExns;
2370 }
2371 else
2372 if (mxcsr & (1<<15)) {
2373 /* FZ is set */
2374 ew = EmWarn_X86_fz;
2375 }
2376 else
2377 if (mxcsr & (1<<6)) {
2378 /* DAZ is set */
2379 ew = EmWarn_X86_daz;
2380 }
2381
2382 return (((ULong)ew) << 32) | ((ULong)rmode);
2383 }
2384
2385
2386 /* CALLED FROM GENERATED CODE */
2387 /* CLEAN HELPER */
2388 /* Given sseround as an IRRoundingMode value, create a suitable SSE
2389 native format MXCSR value. */
amd64g_create_mxcsr(ULong sseround)2390 ULong amd64g_create_mxcsr ( ULong sseround )
2391 {
2392 sseround &= 3;
2393 return 0x1F80 | (sseround << 13);
2394 }
2395
2396
2397 /* CLEAN HELPER */
2398 /* fpucw[15:0] contains a x87 native format FPU control word.
2399 Extract from it the required FPROUND value and any resulting
2400 emulation warning, and return (warn << 32) | fpround value.
2401 */
amd64g_check_fldcw(ULong fpucw)2402 ULong amd64g_check_fldcw ( ULong fpucw )
2403 {
2404 /* Decide on a rounding mode. fpucw[11:10] holds it. */
2405 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2406 ULong rmode = (fpucw >> 10) & 3;
2407
2408 /* Detect any required emulation warnings. */
2409 VexEmNote ew = EmNote_NONE;
2410
2411 if ((fpucw & 0x3F) != 0x3F) {
2412 /* unmasked exceptions! */
2413 ew = EmWarn_X86_x87exns;
2414 }
2415 else
2416 if (((fpucw >> 8) & 3) != 3) {
2417 /* unsupported precision */
2418 ew = EmWarn_X86_x87precision;
2419 }
2420
2421 return (((ULong)ew) << 32) | ((ULong)rmode);
2422 }
2423
2424
2425 /* CLEAN HELPER */
2426 /* Given fpround as an IRRoundingMode value, create a suitable x87
2427 native format FPU control word. */
amd64g_create_fpucw(ULong fpround)2428 ULong amd64g_create_fpucw ( ULong fpround )
2429 {
2430 fpround &= 3;
2431 return 0x037F | (fpround << 10);
2432 }
2433
2434
2435 /* This is used to implement 'fldenv'.
2436 Reads 28 bytes at x87_state[0 .. 27]. */
2437 /* CALLED FROM GENERATED CODE */
2438 /* DIRTY HELPER */
amd64g_dirtyhelper_FLDENV(VexGuestAMD64State * vex_state,HWord x87_state)2439 VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
2440 /*IN*/HWord x87_state)
2441 {
2442 return do_put_x87( False, (Fpu_State*)x87_state, vex_state );
2443 }
2444
2445
2446 /* CALLED FROM GENERATED CODE */
2447 /* DIRTY HELPER */
2448 /* Create an x87 FPU env from the guest state, as close as we can
2449 approximate it. Writes 28 bytes at x87_state[0..27]. */
amd64g_dirtyhelper_FSTENV(VexGuestAMD64State * vex_state,HWord x87_state)2450 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
2451 /*OUT*/HWord x87_state )
2452 {
2453 Int i, stno, preg;
2454 UInt tagw;
2455 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2456 Fpu_State* x87 = (Fpu_State*)x87_state;
2457 UInt ftop = vex_state->guest_FTOP;
2458 ULong c3210 = vex_state->guest_FC3210;
2459
2460 for (i = 0; i < 14; i++)
2461 x87->env[i] = 0;
2462
2463 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
2464 x87->env[FP_ENV_STAT]
2465 = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
2466 x87->env[FP_ENV_CTRL]
2467 = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
2468
2469 /* Compute the x87 tag word. */
2470 tagw = 0;
2471 for (stno = 0; stno < 8; stno++) {
2472 preg = (stno + ftop) & 7;
2473 if (vexTags[preg] == 0) {
2474 /* register is empty */
2475 tagw |= (3 << (2*preg));
2476 } else {
2477 /* register is full. */
2478 tagw |= (0 << (2*preg));
2479 }
2480 }
2481 x87->env[FP_ENV_TAG] = toUShort(tagw);
2482
2483 /* We don't dump the x87 registers, tho. */
2484 }
2485
2486
2487 /* This is used to implement 'fnsave'.
2488 Writes 108 bytes at x87_state[0 .. 107]. */
2489 /* CALLED FROM GENERATED CODE */
2490 /* DIRTY HELPER */
amd64g_dirtyhelper_FNSAVE(VexGuestAMD64State * vex_state,HWord x87_state)2491 void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state,
2492 /*OUT*/HWord x87_state)
2493 {
2494 do_get_x87( vex_state, (Fpu_State*)x87_state );
2495 }
2496
2497
2498 /* This is used to implement 'fnsaves'.
2499 Writes 94 bytes at x87_state[0 .. 93]. */
2500 /* CALLED FROM GENERATED CODE */
2501 /* DIRTY HELPER */
amd64g_dirtyhelper_FNSAVES(VexGuestAMD64State * vex_state,HWord x87_state)2502 void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state,
2503 /*OUT*/HWord x87_state)
2504 {
2505 Int i, stno, preg;
2506 UInt tagw;
2507 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2508 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2509 Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
2510 UInt ftop = vex_state->guest_FTOP;
2511 UInt c3210 = vex_state->guest_FC3210;
2512
2513 for (i = 0; i < 7; i++)
2514 x87->env[i] = 0;
2515
2516 x87->env[FPS_ENV_STAT]
2517 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
2518 x87->env[FPS_ENV_CTRL]
2519 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
2520
2521 /* Dump the register stack in ST order. */
2522 tagw = 0;
2523 for (stno = 0; stno < 8; stno++) {
2524 preg = (stno + ftop) & 7;
2525 if (vexTags[preg] == 0) {
2526 /* register is empty */
2527 tagw |= (3 << (2*preg));
2528 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2529 &x87->reg[10*stno] );
2530 } else {
2531 /* register is full. */
2532 tagw |= (0 << (2*preg));
2533 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2534 &x87->reg[10*stno] );
2535 }
2536 }
2537 x87->env[FPS_ENV_TAG] = toUShort(tagw);
2538 }
2539
2540
2541 /* This is used to implement 'frstor'.
2542 Reads 108 bytes at x87_state[0 .. 107]. */
2543 /* CALLED FROM GENERATED CODE */
2544 /* DIRTY HELPER */
amd64g_dirtyhelper_FRSTOR(VexGuestAMD64State * vex_state,HWord x87_state)2545 VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state,
2546 /*IN*/HWord x87_state)
2547 {
2548 return do_put_x87( True, (Fpu_State*)x87_state, vex_state );
2549 }
2550
2551
2552 /* This is used to implement 'frstors'.
2553 Reads 94 bytes at x87_state[0 .. 93]. */
2554 /* CALLED FROM GENERATED CODE */
2555 /* DIRTY HELPER */
amd64g_dirtyhelper_FRSTORS(VexGuestAMD64State * vex_state,HWord x87_state)2556 VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state,
2557 /*IN*/HWord x87_state)
2558 {
2559 Int stno, preg;
2560 UInt tag;
2561 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2562 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2563 Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
2564 UInt ftop = (x87->env[FPS_ENV_STAT] >> 11) & 7;
2565 UInt tagw = x87->env[FPS_ENV_TAG];
2566 UInt fpucw = x87->env[FPS_ENV_CTRL];
2567 UInt c3210 = x87->env[FPS_ENV_STAT] & 0x4700;
2568 VexEmNote ew;
2569 UInt fpround;
2570 ULong pair;
2571
2572 /* Copy registers and tags */
2573 for (stno = 0; stno < 8; stno++) {
2574 preg = (stno + ftop) & 7;
2575 tag = (tagw >> (2*preg)) & 3;
2576 if (tag == 3) {
2577 /* register is empty */
2578 /* hmm, if it's empty, does it still get written? Probably
2579 safer to say it does. If we don't, memcheck could get out
2580 of sync, in that it thinks all FP registers are defined by
2581 this helper, but in reality some have not been updated. */
2582 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
2583 vexTags[preg] = 0;
2584 } else {
2585 /* register is non-empty */
2586 convert_f80le_to_f64le( &x87->reg[10*stno],
2587 (UChar*)&vexRegs[preg] );
2588 vexTags[preg] = 1;
2589 }
2590 }
2591
2592 /* stack pointer */
2593 vex_state->guest_FTOP = ftop;
2594
2595 /* status word */
2596 vex_state->guest_FC3210 = c3210;
2597
2598 /* handle the control word, setting FPROUND and detecting any
2599 emulation warnings. */
2600 pair = amd64g_check_fldcw ( (ULong)fpucw );
2601 fpround = (UInt)pair & 0xFFFFFFFFULL;
2602 ew = (VexEmNote)(pair >> 32);
2603
2604 vex_state->guest_FPROUND = fpround & 3;
2605
2606 /* emulation warnings --> caller */
2607 return ew;
2608 }
2609
2610
2611 /*---------------------------------------------------------------*/
2612 /*--- CPUID helpers. ---*/
2613 /*---------------------------------------------------------------*/
2614
2615 /* Claim to be the following CPU, which is probably representative of
2616 the lowliest (earliest) amd64 offerings. It can do neither sse3
2617 nor cx16.
2618
2619 vendor_id : AuthenticAMD
2620 cpu family : 15
2621 model : 5
2622 model name : AMD Opteron (tm) Processor 848
2623 stepping : 10
2624 cpu MHz : 1797.682
2625 cache size : 1024 KB
2626 fpu : yes
2627 fpu_exception : yes
2628 cpuid level : 1
2629 wp : yes
2630 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2631 mtrr pge mca cmov pat pse36 clflush mmx fxsr
2632 sse sse2 syscall nx mmxext lm 3dnowext 3dnow
2633 bogomips : 3600.62
2634 TLB size : 1088 4K pages
2635 clflush size : 64
2636 cache_alignment : 64
2637 address sizes : 40 bits physical, 48 bits virtual
2638 power management: ts fid vid ttp
2639
2640 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact
2641 we don't support them. See #291568. 3dnow is 80000001.EDX.31
2642 and 3dnowext is 80000001.EDX.30.
2643 */
amd64g_dirtyhelper_CPUID_baseline(VexGuestAMD64State * st)2644 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
2645 {
2646 # define SET_ABCD(_a,_b,_c,_d) \
2647 do { st->guest_RAX = (ULong)(_a); \
2648 st->guest_RBX = (ULong)(_b); \
2649 st->guest_RCX = (ULong)(_c); \
2650 st->guest_RDX = (ULong)(_d); \
2651 } while (0)
2652
2653 switch (0xFFFFFFFF & st->guest_RAX) {
2654 case 0x00000000:
2655 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
2656 break;
2657 case 0x00000001:
2658 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
2659 break;
2660 case 0x80000000:
2661 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
2662 break;
2663 case 0x80000001:
2664 /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is
2665 the original it-is-supported value that the h/w provides.
2666 See #291568. */
2667 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/
2668 0x21d3fbff);
2669 break;
2670 case 0x80000002:
2671 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
2672 break;
2673 case 0x80000003:
2674 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
2675 break;
2676 case 0x80000004:
2677 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2678 break;
2679 case 0x80000005:
2680 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
2681 break;
2682 case 0x80000006:
2683 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
2684 break;
2685 case 0x80000007:
2686 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
2687 break;
2688 case 0x80000008:
2689 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
2690 break;
2691 default:
2692 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2693 break;
2694 }
2695 # undef SET_ABCD
2696 }
2697
2698
2699 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
2700 capable.
2701
2702 vendor_id : GenuineIntel
2703 cpu family : 6
2704 model : 15
2705 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2706 stepping : 6
2707 cpu MHz : 2394.000
2708 cache size : 4096 KB
2709 physical id : 0
2710 siblings : 2
2711 core id : 0
2712 cpu cores : 2
2713 fpu : yes
2714 fpu_exception : yes
2715 cpuid level : 10
2716 wp : yes
2717 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2718 mtrr pge mca cmov pat pse36 clflush dts acpi
2719 mmx fxsr sse sse2 ss ht tm syscall nx lm
2720 constant_tsc pni monitor ds_cpl vmx est tm2
2721 cx16 xtpr lahf_lm
2722 bogomips : 4798.78
2723 clflush size : 64
2724 cache_alignment : 64
2725 address sizes : 36 bits physical, 48 bits virtual
2726 power management:
2727 */
amd64g_dirtyhelper_CPUID_sse3_and_cx16(VexGuestAMD64State * st)2728 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
2729 {
2730 # define SET_ABCD(_a,_b,_c,_d) \
2731 do { st->guest_RAX = (ULong)(_a); \
2732 st->guest_RBX = (ULong)(_b); \
2733 st->guest_RCX = (ULong)(_c); \
2734 st->guest_RDX = (ULong)(_d); \
2735 } while (0)
2736
2737 switch (0xFFFFFFFF & st->guest_RAX) {
2738 case 0x00000000:
2739 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2740 break;
2741 case 0x00000001:
2742 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2743 break;
2744 case 0x00000002:
2745 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2746 break;
2747 case 0x00000003:
2748 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2749 break;
2750 case 0x00000004: {
2751 switch (0xFFFFFFFF & st->guest_RCX) {
2752 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2753 0x0000003f, 0x00000001); break;
2754 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2755 0x0000003f, 0x00000001); break;
2756 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2757 0x00000fff, 0x00000001); break;
2758 default: SET_ABCD(0x00000000, 0x00000000,
2759 0x00000000, 0x00000000); break;
2760 }
2761 break;
2762 }
2763 case 0x00000005:
2764 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2765 break;
2766 case 0x00000006:
2767 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2768 break;
2769 case 0x00000007:
2770 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2771 break;
2772 case 0x00000008:
2773 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2774 break;
2775 case 0x00000009:
2776 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2777 break;
2778 case 0x0000000a:
2779 unhandled_eax_value:
2780 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2781 break;
2782 case 0x80000000:
2783 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2784 break;
2785 case 0x80000001:
2786 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
2787 break;
2788 case 0x80000002:
2789 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2790 break;
2791 case 0x80000003:
2792 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2793 break;
2794 case 0x80000004:
2795 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2796 break;
2797 case 0x80000005:
2798 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2799 break;
2800 case 0x80000006:
2801 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2802 break;
2803 case 0x80000007:
2804 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2805 break;
2806 case 0x80000008:
2807 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2808 break;
2809 default:
2810 goto unhandled_eax_value;
2811 }
2812 # undef SET_ABCD
2813 }
2814
2815
2816 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
2817 capable.
2818
2819 vendor_id : GenuineIntel
2820 cpu family : 6
2821 model : 37
2822 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz
2823 stepping : 2
2824 cpu MHz : 3334.000
2825 cache size : 4096 KB
2826 physical id : 0
2827 siblings : 4
2828 core id : 0
2829 cpu cores : 2
2830 apicid : 0
2831 initial apicid : 0
2832 fpu : yes
2833 fpu_exception : yes
2834 cpuid level : 11
2835 wp : yes
2836 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2837 mtrr pge mca cmov pat pse36 clflush dts acpi
2838 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2839 lm constant_tsc arch_perfmon pebs bts rep_good
2840 xtopology nonstop_tsc aperfmperf pni pclmulqdq
2841 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
2842 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
2843 arat tpr_shadow vnmi flexpriority ept vpid
2844 bogomips : 6957.57
2845 clflush size : 64
2846 cache_alignment : 64
2847 address sizes : 36 bits physical, 48 bits virtual
2848 power management:
2849 */
amd64g_dirtyhelper_CPUID_sse42_and_cx16(VexGuestAMD64State * st)2850 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
2851 {
2852 # define SET_ABCD(_a,_b,_c,_d) \
2853 do { st->guest_RAX = (ULong)(_a); \
2854 st->guest_RBX = (ULong)(_b); \
2855 st->guest_RCX = (ULong)(_c); \
2856 st->guest_RDX = (ULong)(_d); \
2857 } while (0)
2858
2859 UInt old_eax = (UInt)st->guest_RAX;
2860 UInt old_ecx = (UInt)st->guest_RCX;
2861
2862 switch (old_eax) {
2863 case 0x00000000:
2864 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
2865 break;
2866 case 0x00000001:
2867 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff);
2868 break;
2869 case 0x00000002:
2870 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
2871 break;
2872 case 0x00000003:
2873 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2874 break;
2875 case 0x00000004:
2876 switch (old_ecx) {
2877 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2878 0x0000003f, 0x00000000); break;
2879 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
2880 0x0000007f, 0x00000000); break;
2881 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2882 0x000001ff, 0x00000000); break;
2883 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
2884 0x00000fff, 0x00000002); break;
2885 default: SET_ABCD(0x00000000, 0x00000000,
2886 0x00000000, 0x00000000); break;
2887 }
2888 break;
2889 case 0x00000005:
2890 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2891 break;
2892 case 0x00000006:
2893 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
2894 break;
2895 case 0x00000007:
2896 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2897 break;
2898 case 0x00000008:
2899 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2900 break;
2901 case 0x00000009:
2902 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2903 break;
2904 case 0x0000000a:
2905 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
2906 break;
2907 case 0x0000000b:
2908 switch (old_ecx) {
2909 case 0x00000000:
2910 SET_ABCD(0x00000001, 0x00000002,
2911 0x00000100, 0x00000000); break;
2912 case 0x00000001:
2913 SET_ABCD(0x00000004, 0x00000004,
2914 0x00000201, 0x00000000); break;
2915 default:
2916 SET_ABCD(0x00000000, 0x00000000,
2917 old_ecx, 0x00000000); break;
2918 }
2919 break;
2920 case 0x0000000c:
2921 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2922 break;
2923 case 0x0000000d:
2924 switch (old_ecx) {
2925 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
2926 0x00000100, 0x00000000); break;
2927 case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
2928 0x00000201, 0x00000000); break;
2929 default: SET_ABCD(0x00000000, 0x00000000,
2930 old_ecx, 0x00000000); break;
2931 }
2932 break;
2933 case 0x80000000:
2934 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2935 break;
2936 case 0x80000001:
2937 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
2938 break;
2939 case 0x80000002:
2940 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2941 break;
2942 case 0x80000003:
2943 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
2944 break;
2945 case 0x80000004:
2946 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
2947 break;
2948 case 0x80000005:
2949 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2950 break;
2951 case 0x80000006:
2952 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
2953 break;
2954 case 0x80000007:
2955 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
2956 break;
2957 case 0x80000008:
2958 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2959 break;
2960 default:
2961 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2962 break;
2963 }
2964 # undef SET_ABCD
2965 }
2966
2967
2968 /* Claim to be the following CPU (4 x ...), which is AVX and cx16
2969 capable. Plus (kludge!) it "supports" HTM.
2970
2971 Also with the following change: claim that XSaveOpt is not
2972 available, by cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1
2973 on the real CPU. Consequently, programs that correctly observe
2974 these CPUID values should only try to use 3 of the 8 XSave-family
2975 instructions: XGETBV, XSAVE and XRSTOR. In particular this avoids
2976 having to implement the compacted or optimised save/restore
2977 variants.
2978
2979 vendor_id : GenuineIntel
2980 cpu family : 6
2981 model : 42
2982 model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz
2983 stepping : 7
2984 cpu MHz : 1600.000
2985 cache size : 6144 KB
2986 physical id : 0
2987 siblings : 4
2988 core id : 3
2989 cpu cores : 4
2990 apicid : 6
2991 initial apicid : 6
2992 fpu : yes
2993 fpu_exception : yes
2994 cpuid level : 13
2995 wp : yes
2996 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2997 mtrr pge mca cmov pat pse36 clflush dts acpi
2998 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2999 lm constant_tsc arch_perfmon pebs bts rep_good
3000 nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq
3001 dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16
3002 xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx
3003 lahf_lm ida arat epb xsaveopt pln pts dts
3004 tpr_shadow vnmi flexpriority ept vpid
3005
3006 bogomips : 5768.94
3007 clflush size : 64
3008 cache_alignment : 64
3009 address sizes : 36 bits physical, 48 bits virtual
3010 power management:
3011 */
amd64g_dirtyhelper_CPUID_avx_and_cx16(VexGuestAMD64State * st)3012 void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st )
3013 {
3014 # define SET_ABCD(_a,_b,_c,_d) \
3015 do { st->guest_RAX = (ULong)(_a); \
3016 st->guest_RBX = (ULong)(_b); \
3017 st->guest_RCX = (ULong)(_c); \
3018 st->guest_RDX = (ULong)(_d); \
3019 } while (0)
3020
3021 UInt old_eax = (UInt)st->guest_RAX;
3022 UInt old_ecx = (UInt)st->guest_RCX;
3023
3024 switch (old_eax) {
3025 case 0x00000000:
3026 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
3027 break;
3028 case 0x00000001:
3029 SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff);
3030 break;
3031 case 0x00000002:
3032 SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000);
3033 break;
3034 case 0x00000003:
3035 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3036 break;
3037 case 0x00000004:
3038 switch (old_ecx) {
3039 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3040 0x0000003f, 0x00000000); break;
3041 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
3042 0x0000003f, 0x00000000); break;
3043 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3044 0x000001ff, 0x00000000); break;
3045 case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f,
3046 0x00001fff, 0x00000006); break;
3047 default: SET_ABCD(0x00000000, 0x00000000,
3048 0x00000000, 0x00000000); break;
3049 }
3050 break;
3051 case 0x00000005:
3052 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
3053 break;
3054 case 0x00000006:
3055 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
3056 break;
3057 case 0x00000007:
3058 SET_ABCD(0x00000000, 0x00000800, 0x00000000, 0x00000000);
3059 break;
3060 case 0x00000008:
3061 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3062 break;
3063 case 0x00000009:
3064 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3065 break;
3066 case 0x0000000a:
3067 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
3068 break;
3069 case 0x0000000b:
3070 switch (old_ecx) {
3071 case 0x00000000:
3072 SET_ABCD(0x00000001, 0x00000001,
3073 0x00000100, 0x00000000); break;
3074 case 0x00000001:
3075 SET_ABCD(0x00000004, 0x00000004,
3076 0x00000201, 0x00000000); break;
3077 default:
3078 SET_ABCD(0x00000000, 0x00000000,
3079 old_ecx, 0x00000000); break;
3080 }
3081 break;
3082 case 0x0000000c:
3083 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3084 break;
3085 case 0x0000000d:
3086 switch (old_ecx) {
3087 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
3088 0x00000340, 0x00000000); break;
3089 case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
3090 0x00000000, 0x00000000); break;
3091 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
3092 0x00000000, 0x00000000); break;
3093 default: SET_ABCD(0x00000000, 0x00000000,
3094 0x00000000, 0x00000000); break;
3095 }
3096 break;
3097 case 0x0000000e:
3098 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3099 break;
3100 case 0x0000000f:
3101 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3102 break;
3103 case 0x80000000:
3104 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3105 break;
3106 case 0x80000001:
3107 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
3108 break;
3109 case 0x80000002:
3110 SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c);
3111 break;
3112 case 0x80000003:
3113 SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d);
3114 break;
3115 case 0x80000004:
3116 SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847);
3117 break;
3118 case 0x80000005:
3119 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3120 break;
3121 case 0x80000006:
3122 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3123 break;
3124 case 0x80000007:
3125 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3126 break;
3127 case 0x80000008:
3128 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3129 break;
3130 default:
3131 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3132 break;
3133 }
3134 # undef SET_ABCD
3135 }
3136
3137
3138 /* Claim to be the following CPU (4 x ...), which is AVX2 capable.
3139
3140 With the following change: claim that XSaveOpt is not available, by
3141 cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1 on the real
3142 CPU. Consequently, programs that correctly observe these CPUID
3143 values should only try to use 3 of the 8 XSave-family instructions:
3144 XGETBV, XSAVE and XRSTOR. In particular this avoids having to
3145 implement the compacted or optimised save/restore variants.
3146
3147 vendor_id : GenuineIntel
3148 cpu family : 6
3149 model : 60
3150 model name : Intel(R) Core(TM) i7-4910MQ CPU @ 2.90GHz
3151 stepping : 3
3152 microcode : 0x1c
3153 cpu MHz : 919.957
3154 cache size : 8192 KB
3155 physical id : 0
3156 siblings : 4
3157 core id : 3
3158 cpu cores : 4
3159 apicid : 6
3160 initial apicid : 6
3161 fpu : yes
3162 fpu_exception : yes
3163 cpuid level : 13
3164 wp : yes
3165 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca
3166 cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
3167 tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc
3168 arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc
3169 aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl
3170 vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1
3171 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave
3172 avx f16c rdrand lahf_lm abm ida arat epb pln pts dtherm
3173 tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust
3174 bmi1 avx2 smep bmi2 erms invpcid xsaveopt
3175 bugs :
3176 bogomips : 5786.68
3177 clflush size : 64
3178 cache_alignment : 64
3179 address sizes : 39 bits physical, 48 bits virtual
3180 power management:
3181 */
amd64g_dirtyhelper_CPUID_avx2(VexGuestAMD64State * st)3182 void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st )
3183 {
3184 # define SET_ABCD(_a,_b,_c,_d) \
3185 do { st->guest_RAX = (ULong)(_a); \
3186 st->guest_RBX = (ULong)(_b); \
3187 st->guest_RCX = (ULong)(_c); \
3188 st->guest_RDX = (ULong)(_d); \
3189 } while (0)
3190
3191 UInt old_eax = (UInt)st->guest_RAX;
3192 UInt old_ecx = (UInt)st->guest_RCX;
3193
3194 switch (old_eax) {
3195 case 0x00000000:
3196 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
3197 break;
3198 case 0x00000001:
3199 /* Don't advertise RDRAND support, bit 30 in ECX. */
3200 SET_ABCD(0x000306c3, 0x02100800, 0x3ffafbff, 0xbfebfbff);
3201 break;
3202 case 0x00000002:
3203 SET_ABCD(0x76036301, 0x00f0b6ff, 0x00000000, 0x00c10000);
3204 break;
3205 case 0x00000003:
3206 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3207 break;
3208 case 0x00000004:
3209 switch (old_ecx) {
3210 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3211 0x0000003f, 0x00000000); break;
3212 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
3213 0x0000003f, 0x00000000); break;
3214 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3215 0x000001ff, 0x00000000); break;
3216 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
3217 0x00001fff, 0x00000006); break;
3218 default: SET_ABCD(0x00000000, 0x00000000,
3219 0x00000000, 0x00000000); break;
3220 }
3221 break;
3222 case 0x00000005:
3223 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00042120);
3224 break;
3225 case 0x00000006:
3226 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
3227 break;
3228 case 0x00000007:
3229 switch (old_ecx) {
3230 case 0x00000000: SET_ABCD(0x00000000, 0x000027ab,
3231 0x00000000, 0x00000000); break;
3232 default: SET_ABCD(0x00000000, 0x00000000,
3233 0x00000000, 0x00000000); break;
3234 }
3235 break;
3236 case 0x00000008:
3237 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3238 break;
3239 case 0x00000009:
3240 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3241 break;
3242 case 0x0000000a:
3243 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
3244 break;
3245 case 0x0000000b:
3246 switch (old_ecx) {
3247 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
3248 0x00000100, 0x00000002); break;
3249 case 0x00000001: SET_ABCD(0x00000004, 0x00000008,
3250 0x00000201, 0x00000002); break;
3251 default: SET_ABCD(0x00000000, 0x00000000,
3252 old_ecx, 0x00000002); break;
3253 }
3254 break;
3255 case 0x0000000c:
3256 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3257 break;
3258 case 0x0000000d:
3259 switch (old_ecx) {
3260 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
3261 0x00000340, 0x00000000); break;
3262 case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
3263 0x00000000, 0x00000000); break;
3264 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
3265 0x00000000, 0x00000000); break;
3266 default: SET_ABCD(0x00000000, 0x00000000,
3267 0x00000000, 0x00000000); break;
3268 }
3269 break;
3270 case 0x80000000:
3271 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3272 break;
3273 case 0x80000001:
3274 SET_ABCD(0x00000000, 0x00000000, 0x00000021, 0x2c100800);
3275 break;
3276 case 0x80000002:
3277 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3278 break;
3279 case 0x80000003:
3280 SET_ABCD(0x37692029, 0x3139342d, 0x20514d30, 0x20555043);
3281 break;
3282 case 0x80000004:
3283 SET_ABCD(0x2e322040, 0x48473039, 0x0000007a, 0x00000000);
3284 break;
3285 case 0x80000005:
3286 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3287 break;
3288 case 0x80000006:
3289 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3290 break;
3291 case 0x80000007:
3292 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3293 break;
3294 case 0x80000008:
3295 SET_ABCD(0x00003027, 0x00000000, 0x00000000, 0x00000000);
3296 break;
3297 default:
3298 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3299 break;
3300 }
3301 # undef SET_ABCD
3302 }
3303
3304
3305 /*---------------------------------------------------------------*/
3306 /*--- Misc integer helpers, including rotates and crypto. ---*/
3307 /*---------------------------------------------------------------*/
3308
amd64g_calculate_RCR(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)3309 ULong amd64g_calculate_RCR ( ULong arg,
3310 ULong rot_amt,
3311 ULong rflags_in,
3312 Long szIN )
3313 {
3314 Bool wantRflags = toBool(szIN < 0);
3315 ULong sz = wantRflags ? (-szIN) : szIN;
3316 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
3317 ULong cf=0, of=0, tempcf;
3318
3319 switch (sz) {
3320 case 8:
3321 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3322 of = ((arg >> 63) ^ cf) & 1;
3323 while (tempCOUNT > 0) {
3324 tempcf = arg & 1;
3325 arg = (arg >> 1) | (cf << 63);
3326 cf = tempcf;
3327 tempCOUNT--;
3328 }
3329 break;
3330 case 4:
3331 while (tempCOUNT >= 33) tempCOUNT -= 33;
3332 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3333 of = ((arg >> 31) ^ cf) & 1;
3334 while (tempCOUNT > 0) {
3335 tempcf = arg & 1;
3336 arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
3337 cf = tempcf;
3338 tempCOUNT--;
3339 }
3340 break;
3341 case 2:
3342 while (tempCOUNT >= 17) tempCOUNT -= 17;
3343 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3344 of = ((arg >> 15) ^ cf) & 1;
3345 while (tempCOUNT > 0) {
3346 tempcf = arg & 1;
3347 arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
3348 cf = tempcf;
3349 tempCOUNT--;
3350 }
3351 break;
3352 case 1:
3353 while (tempCOUNT >= 9) tempCOUNT -= 9;
3354 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3355 of = ((arg >> 7) ^ cf) & 1;
3356 while (tempCOUNT > 0) {
3357 tempcf = arg & 1;
3358 arg = ((arg >> 1) & 0x7FULL) | (cf << 7);
3359 cf = tempcf;
3360 tempCOUNT--;
3361 }
3362 break;
3363 default:
3364 vpanic("calculate_RCR(amd64g): invalid size");
3365 }
3366
3367 cf &= 1;
3368 of &= 1;
3369 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
3370 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
3371
3372 /* caller can ask to have back either the resulting flags or
3373 resulting value, but not both */
3374 return wantRflags ? rflags_in : arg;
3375 }
3376
amd64g_calculate_RCL(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)3377 ULong amd64g_calculate_RCL ( ULong arg,
3378 ULong rot_amt,
3379 ULong rflags_in,
3380 Long szIN )
3381 {
3382 Bool wantRflags = toBool(szIN < 0);
3383 ULong sz = wantRflags ? (-szIN) : szIN;
3384 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
3385 ULong cf=0, of=0, tempcf;
3386
3387 switch (sz) {
3388 case 8:
3389 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3390 while (tempCOUNT > 0) {
3391 tempcf = (arg >> 63) & 1;
3392 arg = (arg << 1) | (cf & 1);
3393 cf = tempcf;
3394 tempCOUNT--;
3395 }
3396 of = ((arg >> 63) ^ cf) & 1;
3397 break;
3398 case 4:
3399 while (tempCOUNT >= 33) tempCOUNT -= 33;
3400 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3401 while (tempCOUNT > 0) {
3402 tempcf = (arg >> 31) & 1;
3403 arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
3404 cf = tempcf;
3405 tempCOUNT--;
3406 }
3407 of = ((arg >> 31) ^ cf) & 1;
3408 break;
3409 case 2:
3410 while (tempCOUNT >= 17) tempCOUNT -= 17;
3411 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3412 while (tempCOUNT > 0) {
3413 tempcf = (arg >> 15) & 1;
3414 arg = 0xFFFFULL & ((arg << 1) | (cf & 1));
3415 cf = tempcf;
3416 tempCOUNT--;
3417 }
3418 of = ((arg >> 15) ^ cf) & 1;
3419 break;
3420 case 1:
3421 while (tempCOUNT >= 9) tempCOUNT -= 9;
3422 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3423 while (tempCOUNT > 0) {
3424 tempcf = (arg >> 7) & 1;
3425 arg = 0xFFULL & ((arg << 1) | (cf & 1));
3426 cf = tempcf;
3427 tempCOUNT--;
3428 }
3429 of = ((arg >> 7) ^ cf) & 1;
3430 break;
3431 default:
3432 vpanic("calculate_RCL(amd64g): invalid size");
3433 }
3434
3435 cf &= 1;
3436 of &= 1;
3437 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
3438 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
3439
3440 return wantRflags ? rflags_in : arg;
3441 }
3442
3443 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
3444 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
3445 */
amd64g_calculate_pclmul(ULong a,ULong b,ULong which)3446 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
3447 {
3448 ULong hi, lo, tmp, A[16];
3449
3450 A[0] = 0; A[1] = a;
3451 A[2] = A[1] << 1; A[3] = A[2] ^ a;
3452 A[4] = A[2] << 1; A[5] = A[4] ^ a;
3453 A[6] = A[3] << 1; A[7] = A[6] ^ a;
3454 A[8] = A[4] << 1; A[9] = A[8] ^ a;
3455 A[10] = A[5] << 1; A[11] = A[10] ^ a;
3456 A[12] = A[6] << 1; A[13] = A[12] ^ a;
3457 A[14] = A[7] << 1; A[15] = A[14] ^ a;
3458
3459 lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
3460 hi = lo >> 56;
3461 lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
3462 hi = (hi << 8) | (lo >> 56);
3463 lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
3464 hi = (hi << 8) | (lo >> 56);
3465 lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
3466 hi = (hi << 8) | (lo >> 56);
3467 lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
3468 hi = (hi << 8) | (lo >> 56);
3469 lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
3470 hi = (hi << 8) | (lo >> 56);
3471 lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
3472 hi = (hi << 8) | (lo >> 56);
3473 lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
3474
3475 ULong m0 = -1;
3476 m0 /= 255;
3477 tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
3478 tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
3479 tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
3480 tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
3481 tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
3482 tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
3483 tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
3484
3485 return which ? hi : lo;
3486 }
3487
3488
3489 /* CALLED FROM GENERATED CODE */
3490 /* DIRTY HELPER (non-referentially-transparent) */
3491 /* Horrible hack. On non-amd64 platforms, return 1. */
amd64g_dirtyhelper_RDTSC(void)3492 ULong amd64g_dirtyhelper_RDTSC ( void )
3493 {
3494 # if defined(__x86_64__)
3495 UInt eax, edx;
3496 __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
3497 return (((ULong)edx) << 32) | ((ULong)eax);
3498 # else
3499 return 1ULL;
3500 # endif
3501 }
3502
3503 /* CALLED FROM GENERATED CODE */
3504 /* DIRTY HELPER (non-referentially-transparent) */
3505 /* Horrible hack. On non-amd64 platforms, return 1. */
3506 /* This uses a different calling convention from _RDTSC just above
3507 only because of the difficulty of returning 96 bits from a C
3508 function -- RDTSC returns 64 bits and so is simple by comparison,
3509 on amd64. */
amd64g_dirtyhelper_RDTSCP(VexGuestAMD64State * st)3510 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st )
3511 {
3512 # if defined(__x86_64__)
3513 UInt eax, ecx, edx;
3514 __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx));
3515 st->guest_RAX = (ULong)eax;
3516 st->guest_RCX = (ULong)ecx;
3517 st->guest_RDX = (ULong)edx;
3518 # else
3519 /* Do nothing. */
3520 # endif
3521 }
3522
3523 /* CALLED FROM GENERATED CODE */
3524 /* DIRTY HELPER (non-referentially-transparent) */
3525 /* Horrible hack. On non-amd64 platforms, return 0. */
amd64g_dirtyhelper_IN(ULong portno,ULong sz)3526 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
3527 {
3528 # if defined(__x86_64__)
3529 ULong r = 0;
3530 portno &= 0xFFFF;
3531 switch (sz) {
3532 case 4:
3533 __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
3534 : "=a" (r) : "Nd" (portno));
3535 break;
3536 case 2:
3537 __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
3538 : "=a" (r) : "Nd" (portno));
3539 break;
3540 case 1:
3541 __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
3542 : "=a" (r) : "Nd" (portno));
3543 break;
3544 default:
3545 break; /* note: no 64-bit version of insn exists */
3546 }
3547 return r;
3548 # else
3549 return 0;
3550 # endif
3551 }
3552
3553
3554 /* CALLED FROM GENERATED CODE */
3555 /* DIRTY HELPER (non-referentially-transparent) */
3556 /* Horrible hack. On non-amd64 platforms, do nothing. */
amd64g_dirtyhelper_OUT(ULong portno,ULong data,ULong sz)3557 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
3558 {
3559 # if defined(__x86_64__)
3560 portno &= 0xFFFF;
3561 switch (sz) {
3562 case 4:
3563 __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
3564 : : "a" (data), "Nd" (portno));
3565 break;
3566 case 2:
3567 __asm__ __volatile__("outw %w0, %w1"
3568 : : "a" (data), "Nd" (portno));
3569 break;
3570 case 1:
3571 __asm__ __volatile__("outb %b0, %w1"
3572 : : "a" (data), "Nd" (portno));
3573 break;
3574 default:
3575 break; /* note: no 64-bit version of insn exists */
3576 }
3577 # else
3578 /* do nothing */
3579 # endif
3580 }
3581
3582 /* CALLED FROM GENERATED CODE */
3583 /* DIRTY HELPER (non-referentially-transparent) */
3584 /* Horrible hack. On non-amd64 platforms, do nothing. */
3585 /* op = 0: call the native SGDT instruction.
3586 op = 1: call the native SIDT instruction.
3587 */
amd64g_dirtyhelper_SxDT(void * address,ULong op)3588 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
3589 # if defined(__x86_64__)
3590 switch (op) {
3591 case 0:
3592 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
3593 break;
3594 case 1:
3595 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
3596 break;
3597 default:
3598 vpanic("amd64g_dirtyhelper_SxDT");
3599 }
3600 # else
3601 /* do nothing */
3602 UChar* p = (UChar*)address;
3603 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
3604 p[6] = p[7] = p[8] = p[9] = 0;
3605 # endif
3606 }
3607
3608 /*---------------------------------------------------------------*/
3609 /*--- Helpers for MMX/SSE/SSE2. ---*/
3610 /*---------------------------------------------------------------*/
3611
abdU8(UChar xx,UChar yy)3612 static inline UChar abdU8 ( UChar xx, UChar yy ) {
3613 return toUChar(xx>yy ? xx-yy : yy-xx);
3614 }
3615
mk32x2(UInt w1,UInt w0)3616 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
3617 return (((ULong)w1) << 32) | ((ULong)w0);
3618 }
3619
sel16x4_3(ULong w64)3620 static inline UShort sel16x4_3 ( ULong w64 ) {
3621 UInt hi32 = toUInt(w64 >> 32);
3622 return toUShort(hi32 >> 16);
3623 }
sel16x4_2(ULong w64)3624 static inline UShort sel16x4_2 ( ULong w64 ) {
3625 UInt hi32 = toUInt(w64 >> 32);
3626 return toUShort(hi32);
3627 }
sel16x4_1(ULong w64)3628 static inline UShort sel16x4_1 ( ULong w64 ) {
3629 UInt lo32 = toUInt(w64);
3630 return toUShort(lo32 >> 16);
3631 }
sel16x4_0(ULong w64)3632 static inline UShort sel16x4_0 ( ULong w64 ) {
3633 UInt lo32 = toUInt(w64);
3634 return toUShort(lo32);
3635 }
3636
sel8x8_7(ULong w64)3637 static inline UChar sel8x8_7 ( ULong w64 ) {
3638 UInt hi32 = toUInt(w64 >> 32);
3639 return toUChar(hi32 >> 24);
3640 }
sel8x8_6(ULong w64)3641 static inline UChar sel8x8_6 ( ULong w64 ) {
3642 UInt hi32 = toUInt(w64 >> 32);
3643 return toUChar(hi32 >> 16);
3644 }
sel8x8_5(ULong w64)3645 static inline UChar sel8x8_5 ( ULong w64 ) {
3646 UInt hi32 = toUInt(w64 >> 32);
3647 return toUChar(hi32 >> 8);
3648 }
sel8x8_4(ULong w64)3649 static inline UChar sel8x8_4 ( ULong w64 ) {
3650 UInt hi32 = toUInt(w64 >> 32);
3651 return toUChar(hi32 >> 0);
3652 }
sel8x8_3(ULong w64)3653 static inline UChar sel8x8_3 ( ULong w64 ) {
3654 UInt lo32 = toUInt(w64);
3655 return toUChar(lo32 >> 24);
3656 }
sel8x8_2(ULong w64)3657 static inline UChar sel8x8_2 ( ULong w64 ) {
3658 UInt lo32 = toUInt(w64);
3659 return toUChar(lo32 >> 16);
3660 }
sel8x8_1(ULong w64)3661 static inline UChar sel8x8_1 ( ULong w64 ) {
3662 UInt lo32 = toUInt(w64);
3663 return toUChar(lo32 >> 8);
3664 }
sel8x8_0(ULong w64)3665 static inline UChar sel8x8_0 ( ULong w64 ) {
3666 UInt lo32 = toUInt(w64);
3667 return toUChar(lo32 >> 0);
3668 }
3669
3670 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_pmaddwd(ULong xx,ULong yy)3671 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
3672 {
3673 return
3674 mk32x2(
3675 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
3676 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
3677 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
3678 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
3679 );
3680 }
3681
3682 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_psadbw(ULong xx,ULong yy)3683 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
3684 {
3685 UInt t = 0;
3686 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
3687 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
3688 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
3689 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
3690 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
3691 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
3692 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
3693 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
3694 t &= 0xFFFF;
3695 return (ULong)t;
3696 }
3697
3698 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_sse_phminposuw(ULong sLo,ULong sHi)3699 ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi )
3700 {
3701 UShort t, min;
3702 UInt idx;
3703 t = sel16x4_0(sLo); if (True) { min = t; idx = 0; }
3704 t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; }
3705 t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; }
3706 t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; }
3707 t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; }
3708 t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; }
3709 t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; }
3710 t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; }
3711 return ((ULong)(idx << 16)) | ((ULong)min);
3712 }
3713
3714 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32b(ULong crcIn,ULong b)3715 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
3716 {
3717 UInt i;
3718 ULong crc = (b & 0xFFULL) ^ crcIn;
3719 for (i = 0; i < 8; i++)
3720 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3721 return crc;
3722 }
3723
3724 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32w(ULong crcIn,ULong w)3725 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w )
3726 {
3727 UInt i;
3728 ULong crc = (w & 0xFFFFULL) ^ crcIn;
3729 for (i = 0; i < 16; i++)
3730 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3731 return crc;
3732 }
3733
3734 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32l(ULong crcIn,ULong l)3735 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l )
3736 {
3737 UInt i;
3738 ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn;
3739 for (i = 0; i < 32; i++)
3740 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3741 return crc;
3742 }
3743
3744 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32q(ULong crcIn,ULong q)3745 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q )
3746 {
3747 ULong crc = amd64g_calc_crc32l(crcIn, q);
3748 return amd64g_calc_crc32l(crc, q >> 32);
3749 }
3750
3751
3752 /* .. helper for next fn .. */
sad_8x4(ULong xx,ULong yy)3753 static inline ULong sad_8x4 ( ULong xx, ULong yy )
3754 {
3755 UInt t = 0;
3756 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
3757 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
3758 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
3759 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
3760 return (ULong)t;
3761 }
3762
3763 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_mpsadbw(ULong sHi,ULong sLo,ULong dHi,ULong dLo,ULong imm_and_return_control_bit)3764 ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo,
3765 ULong dHi, ULong dLo,
3766 ULong imm_and_return_control_bit )
3767 {
3768 UInt imm8 = imm_and_return_control_bit & 7;
3769 Bool calcHi = (imm_and_return_control_bit >> 7) & 1;
3770 UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */
3771 UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */
3772 /* For src we only need 32 bits, so get them into the
3773 lower half of a 64 bit word. */
3774 ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1));
3775 /* For dst we need to get hold of 56 bits (7 bytes) from a total of
3776 11 bytes. If calculating the low part of the result, need bytes
3777 dstOffsL * 4 + (0 .. 6); if calculating the high part,
3778 dstOffsL * 4 + (4 .. 10). */
3779 ULong dst;
3780 /* dstOffL = 0, Lo -> 0 .. 6
3781 dstOffL = 1, Lo -> 4 .. 10
3782 dstOffL = 0, Hi -> 4 .. 10
3783 dstOffL = 1, Hi -> 8 .. 14
3784 */
3785 if (calcHi && dstOffsL) {
3786 /* 8 .. 14 */
3787 dst = dHi & 0x00FFFFFFFFFFFFFFULL;
3788 }
3789 else if (!calcHi && !dstOffsL) {
3790 /* 0 .. 6 */
3791 dst = dLo & 0x00FFFFFFFFFFFFFFULL;
3792 }
3793 else {
3794 /* 4 .. 10 */
3795 dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32);
3796 }
3797 ULong r0 = sad_8x4( dst >> 0, src );
3798 ULong r1 = sad_8x4( dst >> 8, src );
3799 ULong r2 = sad_8x4( dst >> 16, src );
3800 ULong r3 = sad_8x4( dst >> 24, src );
3801 ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0;
3802 return res;
3803 }
3804
3805 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_pext(ULong src_masked,ULong mask)3806 ULong amd64g_calculate_pext ( ULong src_masked, ULong mask )
3807 {
3808 ULong dst = 0;
3809 ULong src_bit;
3810 ULong dst_bit = 1;
3811 for (src_bit = 1; src_bit; src_bit <<= 1) {
3812 if (mask & src_bit) {
3813 if (src_masked & src_bit) dst |= dst_bit;
3814 dst_bit <<= 1;
3815 }
3816 }
3817 return dst;
3818 }
3819
3820 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_pdep(ULong src,ULong mask)3821 ULong amd64g_calculate_pdep ( ULong src, ULong mask )
3822 {
3823 ULong dst = 0;
3824 ULong dst_bit;
3825 ULong src_bit = 1;
3826 for (dst_bit = 1; dst_bit; dst_bit <<= 1) {
3827 if (mask & dst_bit) {
3828 if (src & src_bit) dst |= dst_bit;
3829 src_bit <<= 1;
3830 }
3831 }
3832 return dst;
3833 }
3834
3835 /*---------------------------------------------------------------*/
3836 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/
3837 /*---------------------------------------------------------------*/
3838
zmask_from_V128(V128 * arg)3839 static UInt zmask_from_V128 ( V128* arg )
3840 {
3841 UInt i, res = 0;
3842 for (i = 0; i < 16; i++) {
3843 res |= ((arg->w8[i] == 0) ? 1 : 0) << i;
3844 }
3845 return res;
3846 }
3847
zmask_from_V128_wide(V128 * arg)3848 static UInt zmask_from_V128_wide ( V128* arg )
3849 {
3850 UInt i, res = 0;
3851 for (i = 0; i < 8; i++) {
3852 res |= ((arg->w16[i] == 0) ? 1 : 0) << i;
3853 }
3854 return res;
3855 }
3856
3857 /* Helps with PCMP{I,E}STR{I,M}.
3858
3859 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
3860 actually it could be a clean helper, but for the fact that we can't
3861 pass by value 2 x V128 to a clean helper, nor have one returned.)
3862 Reads guest state, writes to guest state for the xSTRM cases, no
3863 accesses of memory, is a pure function.
3864
3865 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
3866 the callee knows which I/E and I/M variant it is dealing with and
3867 what the specific operation is. 4th byte of opcode is in the range
3868 0x60 to 0x63:
3869 istri 66 0F 3A 63
3870 istrm 66 0F 3A 62
3871 estri 66 0F 3A 61
3872 estrm 66 0F 3A 60
3873
3874 gstOffL and gstOffR are the guest state offsets for the two XMM
3875 register inputs. We never have to deal with the memory case since
3876 that is handled by pre-loading the relevant value into the fake
3877 XMM16 register.
3878
3879 For ESTRx variants, edxIN and eaxIN hold the values of those two
3880 registers.
3881
3882 In all cases, the bottom 16 bits of the result contain the new
3883 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
3884 result hold the new %ecx value. For xSTRM variants, the helper
3885 writes the result directly to the guest XMM0.
3886
3887 Declarable side effects: in all cases, reads guest state at
3888 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
3889 guest_XMM0.
3890
3891 Is expected to be called with opc_and_imm combinations which have
3892 actually been validated, and will assert if otherwise. The front
3893 end should ensure we're only called with verified values.
3894 */
amd64g_dirtyhelper_PCMPxSTRx(VexGuestAMD64State * gst,HWord opc4_and_imm,HWord gstOffL,HWord gstOffR,HWord edxIN,HWord eaxIN)3895 ULong amd64g_dirtyhelper_PCMPxSTRx (
3896 VexGuestAMD64State* gst,
3897 HWord opc4_and_imm,
3898 HWord gstOffL, HWord gstOffR,
3899 HWord edxIN, HWord eaxIN
3900 )
3901 {
3902 HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
3903 HWord imm8 = opc4_and_imm & 0xFF;
3904 HWord isISTRx = opc4 & 2;
3905 HWord isxSTRM = (opc4 & 1) ^ 1;
3906 vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
3907 HWord wide = (imm8 & 1);
3908
3909 // where the args are
3910 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
3911 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
3912
3913 /* Create the arg validity masks, either from the vectors
3914 themselves or from the supplied edx/eax values. */
3915 // FIXME: this is only right for the 8-bit data cases.
3916 // At least that is asserted above.
3917 UInt zmaskL, zmaskR;
3918
3919 // temp spot for the resulting flags and vector.
3920 V128 resV;
3921 UInt resOSZACP;
3922
3923 // for checking whether case was handled
3924 Bool ok = False;
3925
3926 if (wide) {
3927 if (isISTRx) {
3928 zmaskL = zmask_from_V128_wide(argL);
3929 zmaskR = zmask_from_V128_wide(argR);
3930 } else {
3931 Int tmp;
3932 tmp = edxIN & 0xFFFFFFFF;
3933 if (tmp < -8) tmp = -8;
3934 if (tmp > 8) tmp = 8;
3935 if (tmp < 0) tmp = -tmp;
3936 vassert(tmp >= 0 && tmp <= 8);
3937 zmaskL = (1 << tmp) & 0xFF;
3938 tmp = eaxIN & 0xFFFFFFFF;
3939 if (tmp < -8) tmp = -8;
3940 if (tmp > 8) tmp = 8;
3941 if (tmp < 0) tmp = -tmp;
3942 vassert(tmp >= 0 && tmp <= 8);
3943 zmaskR = (1 << tmp) & 0xFF;
3944 }
3945 // do the meyaath
3946 ok = compute_PCMPxSTRx_wide (
3947 &resV, &resOSZACP, argL, argR,
3948 zmaskL, zmaskR, imm8, (Bool)isxSTRM
3949 );
3950 } else {
3951 if (isISTRx) {
3952 zmaskL = zmask_from_V128(argL);
3953 zmaskR = zmask_from_V128(argR);
3954 } else {
3955 Int tmp;
3956 tmp = edxIN & 0xFFFFFFFF;
3957 if (tmp < -16) tmp = -16;
3958 if (tmp > 16) tmp = 16;
3959 if (tmp < 0) tmp = -tmp;
3960 vassert(tmp >= 0 && tmp <= 16);
3961 zmaskL = (1 << tmp) & 0xFFFF;
3962 tmp = eaxIN & 0xFFFFFFFF;
3963 if (tmp < -16) tmp = -16;
3964 if (tmp > 16) tmp = 16;
3965 if (tmp < 0) tmp = -tmp;
3966 vassert(tmp >= 0 && tmp <= 16);
3967 zmaskR = (1 << tmp) & 0xFFFF;
3968 }
3969 // do the meyaath
3970 ok = compute_PCMPxSTRx (
3971 &resV, &resOSZACP, argL, argR,
3972 zmaskL, zmaskR, imm8, (Bool)isxSTRM
3973 );
3974 }
3975
3976 // front end shouldn't pass us any imm8 variants we can't
3977 // handle. Hence:
3978 vassert(ok);
3979
3980 // So, finally we need to get the results back to the caller.
3981 // In all cases, the new OSZACP value is the lowest 16 of
3982 // the return value.
3983 if (isxSTRM) {
3984 gst->guest_YMM0[0] = resV.w32[0];
3985 gst->guest_YMM0[1] = resV.w32[1];
3986 gst->guest_YMM0[2] = resV.w32[2];
3987 gst->guest_YMM0[3] = resV.w32[3];
3988 return resOSZACP & 0x8D5;
3989 } else {
3990 UInt newECX = resV.w32[0] & 0xFFFF;
3991 return (newECX << 16) | (resOSZACP & 0x8D5);
3992 }
3993 }
3994
3995 /*---------------------------------------------------------------*/
3996 /*--- AES primitives and helpers ---*/
3997 /*---------------------------------------------------------------*/
3998 /* a 16 x 16 matrix */
3999 static const UChar sbox[256] = { // row nr
4000 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1
4001 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
4002 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2
4003 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
4004 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3
4005 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
4006 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4
4007 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
4008 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5
4009 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
4010 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6
4011 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
4012 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7
4013 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
4014 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8
4015 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
4016 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9
4017 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
4018 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10
4019 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
4020 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11
4021 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
4022 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12
4023 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
4024 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13
4025 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
4026 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14
4027 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
4028 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15
4029 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
4030 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16
4031 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
4032 };
SubBytes(V128 * v)4033 static void SubBytes (V128* v)
4034 {
4035 V128 r;
4036 UInt i;
4037 for (i = 0; i < 16; i++)
4038 r.w8[i] = sbox[v->w8[i]];
4039 *v = r;
4040 }
4041
4042 /* a 16 x 16 matrix */
4043 static const UChar invsbox[256] = { // row nr
4044 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1
4045 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
4046 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2
4047 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
4048 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3
4049 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
4050 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4
4051 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
4052 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5
4053 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
4054 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6
4055 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
4056 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7
4057 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
4058 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8
4059 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
4060 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9
4061 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
4062 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10
4063 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
4064 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11
4065 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
4066 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12
4067 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
4068 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13
4069 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
4070 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14
4071 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
4072 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15
4073 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
4074 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16
4075 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
4076 };
InvSubBytes(V128 * v)4077 static void InvSubBytes (V128* v)
4078 {
4079 V128 r;
4080 UInt i;
4081 for (i = 0; i < 16; i++)
4082 r.w8[i] = invsbox[v->w8[i]];
4083 *v = r;
4084 }
4085
4086 static const UChar ShiftRows_op[16] =
4087 {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0};
ShiftRows(V128 * v)4088 static void ShiftRows (V128* v)
4089 {
4090 V128 r;
4091 UInt i;
4092 for (i = 0; i < 16; i++)
4093 r.w8[i] = v->w8[ShiftRows_op[15-i]];
4094 *v = r;
4095 }
4096
4097 static const UChar InvShiftRows_op[16] =
4098 {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0};
InvShiftRows(V128 * v)4099 static void InvShiftRows (V128* v)
4100 {
4101 V128 r;
4102 UInt i;
4103 for (i = 0; i < 16; i++)
4104 r.w8[i] = v->w8[InvShiftRows_op[15-i]];
4105 *v = r;
4106 }
4107
4108 /* Multiplication of the finite fields elements of AES.
4109 See "A Specification for The AES Algorithm Rijndael
4110 (by Joan Daemen & Vincent Rijmen)"
4111 Dr. Brian Gladman, v3.1, 3rd March 2001. */
4112 /* N values so that (hex) xy = 0x03^N.
4113 0x00 cannot be used. We put 0xff for this value.*/
4114 /* a 16 x 16 matrix */
4115 static const UChar Nxy[256] = { // row nr
4116 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1
4117 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
4118 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2
4119 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
4120 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3
4121 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
4122 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4
4123 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
4124 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5
4125 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
4126 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6
4127 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
4128 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7
4129 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
4130 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8
4131 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
4132 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9
4133 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
4134 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10
4135 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
4136 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11
4137 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
4138 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12
4139 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
4140 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13
4141 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
4142 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14
4143 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
4144 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15
4145 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
4146 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16
4147 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07
4148 };
4149
4150 /* E values so that E = 0x03^xy. */
4151 static const UChar Exy[256] = { // row nr
4152 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1
4153 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35,
4154 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2
4155 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa,
4156 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3
4157 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31,
4158 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4
4159 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd,
4160 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5
4161 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88,
4162 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6
4163 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a,
4164 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7
4165 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3,
4166 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8
4167 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0,
4168 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9
4169 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41,
4170 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10
4171 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75,
4172 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11
4173 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80,
4174 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12
4175 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54,
4176 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13
4177 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca,
4178 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14
4179 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e,
4180 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15
4181 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17,
4182 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16
4183 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01};
4184
ff_mul(UChar u1,UChar u2)4185 static inline UChar ff_mul(UChar u1, UChar u2)
4186 {
4187 if ((u1 > 0) && (u2 > 0)) {
4188 UInt ui = Nxy[u1] + Nxy[u2];
4189 if (ui >= 255)
4190 ui = ui - 255;
4191 return Exy[ui];
4192 } else {
4193 return 0;
4194 };
4195 }
4196
MixColumns(V128 * v)4197 static void MixColumns (V128* v)
4198 {
4199 V128 r;
4200 Int j;
4201 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4202 for (j = 0; j < 4; j++) {
4203 P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1))
4204 ^ P(v,j,2) ^ P(v,j,3);
4205 P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) )
4206 ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3);
4207 P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) )
4208 ^ ff_mul(0x03, P(v,j,3) );
4209 P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2)
4210 ^ ff_mul( 0x02, P(v,j,3) );
4211 }
4212 *v = r;
4213 #undef P
4214 }
4215
InvMixColumns(V128 * v)4216 static void InvMixColumns (V128* v)
4217 {
4218 V128 r;
4219 Int j;
4220 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4221 for (j = 0; j < 4; j++) {
4222 P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) )
4223 ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) );
4224 P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) )
4225 ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) );
4226 P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) )
4227 ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) );
4228 P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) )
4229 ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) );
4230 }
4231 *v = r;
4232 #undef P
4233
4234 }
4235
4236 /* For description, see definition in guest_amd64_defs.h */
amd64g_dirtyhelper_AES(VexGuestAMD64State * gst,HWord opc4,HWord gstOffD,HWord gstOffL,HWord gstOffR)4237 void amd64g_dirtyhelper_AES (
4238 VexGuestAMD64State* gst,
4239 HWord opc4, HWord gstOffD,
4240 HWord gstOffL, HWord gstOffR
4241 )
4242 {
4243 // where the args are
4244 V128* argD = (V128*)( ((UChar*)gst) + gstOffD );
4245 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
4246 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
4247 V128 r;
4248
4249 switch (opc4) {
4250 case 0xDC: /* AESENC */
4251 case 0xDD: /* AESENCLAST */
4252 r = *argR;
4253 ShiftRows (&r);
4254 SubBytes (&r);
4255 if (opc4 == 0xDC)
4256 MixColumns (&r);
4257 argD->w64[0] = r.w64[0] ^ argL->w64[0];
4258 argD->w64[1] = r.w64[1] ^ argL->w64[1];
4259 break;
4260
4261 case 0xDE: /* AESDEC */
4262 case 0xDF: /* AESDECLAST */
4263 r = *argR;
4264 InvShiftRows (&r);
4265 InvSubBytes (&r);
4266 if (opc4 == 0xDE)
4267 InvMixColumns (&r);
4268 argD->w64[0] = r.w64[0] ^ argL->w64[0];
4269 argD->w64[1] = r.w64[1] ^ argL->w64[1];
4270 break;
4271
4272 case 0xDB: /* AESIMC */
4273 *argD = *argL;
4274 InvMixColumns (argD);
4275 break;
4276 default: vassert(0);
4277 }
4278 }
4279
RotWord(UInt w32)4280 static inline UInt RotWord (UInt w32)
4281 {
4282 return ((w32 >> 8) | (w32 << 24));
4283 }
4284
SubWord(UInt w32)4285 static inline UInt SubWord (UInt w32)
4286 {
4287 UChar *w8;
4288 UChar *r8;
4289 UInt res;
4290 w8 = (UChar*) &w32;
4291 r8 = (UChar*) &res;
4292 r8[0] = sbox[w8[0]];
4293 r8[1] = sbox[w8[1]];
4294 r8[2] = sbox[w8[2]];
4295 r8[3] = sbox[w8[3]];
4296 return res;
4297 }
4298
4299 /* For description, see definition in guest_amd64_defs.h */
amd64g_dirtyhelper_AESKEYGENASSIST(VexGuestAMD64State * gst,HWord imm8,HWord gstOffL,HWord gstOffR)4300 extern void amd64g_dirtyhelper_AESKEYGENASSIST (
4301 VexGuestAMD64State* gst,
4302 HWord imm8,
4303 HWord gstOffL, HWord gstOffR
4304 )
4305 {
4306 // where the args are
4307 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
4308 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
4309
4310 // We have to create the result in a temporary in the
4311 // case where the src and dst regs are the same. See #341698.
4312 V128 tmp;
4313
4314 tmp.w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8;
4315 tmp.w32[2] = SubWord (argL->w32[3]);
4316 tmp.w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8;
4317 tmp.w32[0] = SubWord (argL->w32[1]);
4318
4319 argR->w32[3] = tmp.w32[3];
4320 argR->w32[2] = tmp.w32[2];
4321 argR->w32[1] = tmp.w32[1];
4322 argR->w32[0] = tmp.w32[0];
4323 }
4324
4325
4326
4327 /*---------------------------------------------------------------*/
4328 /*--- Helpers for dealing with, and describing, ---*/
4329 /*--- guest state as a whole. ---*/
4330 /*---------------------------------------------------------------*/
4331
4332 /* Initialise the entire amd64 guest state. */
4333 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_initialise(VexGuestAMD64State * vex_state)4334 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
4335 {
4336 vex_state->host_EvC_FAILADDR = 0;
4337 vex_state->host_EvC_COUNTER = 0;
4338 vex_state->pad0 = 0;
4339
4340 vex_state->guest_RAX = 0;
4341 vex_state->guest_RCX = 0;
4342 vex_state->guest_RDX = 0;
4343 vex_state->guest_RBX = 0;
4344 vex_state->guest_RSP = 0;
4345 vex_state->guest_RBP = 0;
4346 vex_state->guest_RSI = 0;
4347 vex_state->guest_RDI = 0;
4348 vex_state->guest_R8 = 0;
4349 vex_state->guest_R9 = 0;
4350 vex_state->guest_R10 = 0;
4351 vex_state->guest_R11 = 0;
4352 vex_state->guest_R12 = 0;
4353 vex_state->guest_R13 = 0;
4354 vex_state->guest_R14 = 0;
4355 vex_state->guest_R15 = 0;
4356
4357 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
4358 vex_state->guest_CC_DEP1 = 0;
4359 vex_state->guest_CC_DEP2 = 0;
4360 vex_state->guest_CC_NDEP = 0;
4361
4362 vex_state->guest_DFLAG = 1; /* forwards */
4363 vex_state->guest_IDFLAG = 0;
4364 vex_state->guest_ACFLAG = 0;
4365
4366 /* HACK: represent the offset associated with a constant %fs.
4367 Typically, on linux, this assumes that %fs is only ever zero (main
4368 thread) or 0x63. */
4369 vex_state->guest_FS_CONST = 0;
4370
4371 vex_state->guest_RIP = 0;
4372
4373 /* Initialise the simulated FPU */
4374 amd64g_dirtyhelper_FINIT( vex_state );
4375
4376 /* Initialise the AVX state. */
4377 # define AVXZERO(_ymm) \
4378 do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \
4379 _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \
4380 } while (0)
4381 vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
4382 AVXZERO(vex_state->guest_YMM0);
4383 AVXZERO(vex_state->guest_YMM1);
4384 AVXZERO(vex_state->guest_YMM2);
4385 AVXZERO(vex_state->guest_YMM3);
4386 AVXZERO(vex_state->guest_YMM4);
4387 AVXZERO(vex_state->guest_YMM5);
4388 AVXZERO(vex_state->guest_YMM6);
4389 AVXZERO(vex_state->guest_YMM7);
4390 AVXZERO(vex_state->guest_YMM8);
4391 AVXZERO(vex_state->guest_YMM9);
4392 AVXZERO(vex_state->guest_YMM10);
4393 AVXZERO(vex_state->guest_YMM11);
4394 AVXZERO(vex_state->guest_YMM12);
4395 AVXZERO(vex_state->guest_YMM13);
4396 AVXZERO(vex_state->guest_YMM14);
4397 AVXZERO(vex_state->guest_YMM15);
4398 AVXZERO(vex_state->guest_YMM16);
4399
4400 # undef AVXZERO
4401
4402 vex_state->guest_EMNOTE = EmNote_NONE;
4403
4404 /* These should not ever be either read or written, but we
4405 initialise them anyway. */
4406 vex_state->guest_CMSTART = 0;
4407 vex_state->guest_CMLEN = 0;
4408
4409 vex_state->guest_NRADDR = 0;
4410 vex_state->guest_SC_CLASS = 0;
4411 vex_state->guest_GS_CONST = 0;
4412
4413 vex_state->guest_IP_AT_SYSCALL = 0;
4414 vex_state->pad1 = 0;
4415 }
4416
4417
4418 /* Figure out if any part of the guest state contained in minoff
4419 .. maxoff requires precise memory exceptions. If in doubt return
4420 True (but this generates significantly slower code).
4421
4422 By default we enforce precise exns for guest %RSP, %RBP and %RIP
4423 only. These are the minimum needed to extract correct stack
4424 backtraces from amd64 code.
4425
4426 Only %RSP is needed in mode VexRegUpdSpAtMemAccess.
4427 */
guest_amd64_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)4428 Bool guest_amd64_state_requires_precise_mem_exns (
4429 Int minoff, Int maxoff, VexRegisterUpdates pxControl
4430 )
4431 {
4432 Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
4433 Int rbp_max = rbp_min + 8 - 1;
4434 Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
4435 Int rsp_max = rsp_min + 8 - 1;
4436 Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
4437 Int rip_max = rip_min + 8 - 1;
4438
4439 if (maxoff < rsp_min || minoff > rsp_max) {
4440 /* no overlap with rsp */
4441 if (pxControl == VexRegUpdSpAtMemAccess)
4442 return False; // We only need to check stack pointer.
4443 } else {
4444 return True;
4445 }
4446
4447 if (maxoff < rbp_min || minoff > rbp_max) {
4448 /* no overlap with rbp */
4449 } else {
4450 return True;
4451 }
4452
4453 if (maxoff < rip_min || minoff > rip_max) {
4454 /* no overlap with eip */
4455 } else {
4456 return True;
4457 }
4458
4459 return False;
4460 }
4461
4462
4463 #define ALWAYSDEFD(field) \
4464 { offsetof(VexGuestAMD64State, field), \
4465 (sizeof ((VexGuestAMD64State*)0)->field) }
4466
4467 VexGuestLayout
4468 amd64guest_layout
4469 = {
4470 /* Total size of the guest state, in bytes. */
4471 .total_sizeB = sizeof(VexGuestAMD64State),
4472
4473 /* Describe the stack pointer. */
4474 .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
4475 .sizeof_SP = 8,
4476
4477 /* Describe the frame pointer. */
4478 .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
4479 .sizeof_FP = 8,
4480
4481 /* Describe the instruction pointer. */
4482 .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
4483 .sizeof_IP = 8,
4484
4485 /* Describe any sections to be regarded by Memcheck as
4486 'always-defined'. */
4487 .n_alwaysDefd = 16,
4488
4489 /* flags thunk: OP and NDEP are always defd, whereas DEP1
4490 and DEP2 have to be tracked. See detailed comment in
4491 gdefs.h on meaning of thunk fields. */
4492 .alwaysDefd
4493 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
4494 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
4495 /* 2 */ ALWAYSDEFD(guest_DFLAG),
4496 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
4497 /* 4 */ ALWAYSDEFD(guest_RIP),
4498 /* 5 */ ALWAYSDEFD(guest_FS_CONST),
4499 /* 6 */ ALWAYSDEFD(guest_FTOP),
4500 /* 7 */ ALWAYSDEFD(guest_FPTAG),
4501 /* 8 */ ALWAYSDEFD(guest_FPROUND),
4502 /* 9 */ ALWAYSDEFD(guest_FC3210),
4503 // /* */ ALWAYSDEFD(guest_CS),
4504 // /* */ ALWAYSDEFD(guest_DS),
4505 // /* */ ALWAYSDEFD(guest_ES),
4506 // /* */ ALWAYSDEFD(guest_FS),
4507 // /* */ ALWAYSDEFD(guest_GS),
4508 // /* */ ALWAYSDEFD(guest_SS),
4509 // /* */ ALWAYSDEFD(guest_LDT),
4510 // /* */ ALWAYSDEFD(guest_GDT),
4511 /* 10 */ ALWAYSDEFD(guest_EMNOTE),
4512 /* 11 */ ALWAYSDEFD(guest_SSEROUND),
4513 /* 12 */ ALWAYSDEFD(guest_CMSTART),
4514 /* 13 */ ALWAYSDEFD(guest_CMLEN),
4515 /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
4516 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
4517 }
4518 };
4519
4520
4521 /*---------------------------------------------------------------*/
4522 /*--- end guest_amd64_helpers.c ---*/
4523 /*---------------------------------------------------------------*/
4524