1
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_amd64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2011 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_emwarn.h"
38 #include "libvex_guest_amd64.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41
42 #include "main_util.h"
43 #include "guest_generic_bb_to_IR.h"
44 #include "guest_amd64_defs.h"
45 #include "guest_generic_x87.h"
46
47
48 /* This file contains helper functions for amd64 guest code.
49 Calls to these functions are generated by the back end.
50 These calls are of course in the host machine code and
51 this file will be compiled to host machine code, so that
52 all makes sense.
53
54 Only change the signatures of these helper functions very
55 carefully. If you change the signature here, you'll have to change
56 the parameters passed to it in the IR calls constructed by
57 guest-amd64/toIR.c.
58
59 The convention used is that all functions called from generated
60 code are named amd64g_<something>, and any function whose name lacks
61 that prefix is not called from generated code. Note that some
62 LibVEX_* functions can however be called by VEX's client, but that
63 is not the same as calling them from VEX-generated code.
64 */
65
66
67 /* Set to 1 to get detailed profiling info about use of the flag
68 machinery. */
69 #define PROFILE_RFLAGS 0
70
71
72 /*---------------------------------------------------------------*/
73 /*--- %rflags run-time helpers. ---*/
74 /*---------------------------------------------------------------*/
75
76 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
77 after imulq/mulq. */
78
mullS64(Long u,Long v,Long * rHi,Long * rLo)79 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
80 {
81 ULong u0, v0, w0;
82 Long u1, v1, w1, w2, t;
83 u0 = u & 0xFFFFFFFFULL;
84 u1 = u >> 32;
85 v0 = v & 0xFFFFFFFFULL;
86 v1 = v >> 32;
87 w0 = u0 * v0;
88 t = u1 * v0 + (w0 >> 32);
89 w1 = t & 0xFFFFFFFFULL;
90 w2 = t >> 32;
91 w1 = u0 * v1 + w1;
92 *rHi = u1 * v1 + w2 + (w1 >> 32);
93 *rLo = u * v;
94 }
95
mullU64(ULong u,ULong v,ULong * rHi,ULong * rLo)96 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
97 {
98 ULong u0, v0, w0;
99 ULong u1, v1, w1,w2,t;
100 u0 = u & 0xFFFFFFFFULL;
101 u1 = u >> 32;
102 v0 = v & 0xFFFFFFFFULL;
103 v1 = v >> 32;
104 w0 = u0 * v0;
105 t = u1 * v0 + (w0 >> 32);
106 w1 = t & 0xFFFFFFFFULL;
107 w2 = t >> 32;
108 w1 = u0 * v1 + w1;
109 *rHi = u1 * v1 + w2 + (w1 >> 32);
110 *rLo = u * v;
111 }
112
113
114 static const UChar parity_table[256] = {
115 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
116 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
117 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
118 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
119 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
120 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
121 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
122 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
123 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
124 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
125 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
126 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
127 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
128 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
129 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
130 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
131 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
132 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
133 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
134 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
135 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
136 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
137 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
138 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
139 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
140 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
141 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
142 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
143 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
144 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
145 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
146 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
147 };
148
149 /* generalised left-shifter */
lshift(Long x,Int n)150 static inline Long lshift ( Long x, Int n )
151 {
152 if (n >= 0)
153 return x << n;
154 else
155 return x >> (-n);
156 }
157
158 /* identity on ULong */
idULong(ULong x)159 static inline ULong idULong ( ULong x )
160 {
161 return x;
162 }
163
164
165 #define PREAMBLE(__data_bits) \
166 /* const */ ULong DATA_MASK \
167 = __data_bits==8 \
168 ? 0xFFULL \
169 : (__data_bits==16 \
170 ? 0xFFFFULL \
171 : (__data_bits==32 \
172 ? 0xFFFFFFFFULL \
173 : 0xFFFFFFFFFFFFFFFFULL)); \
174 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \
175 /* const */ ULong CC_DEP1 = cc_dep1_formal; \
176 /* const */ ULong CC_DEP2 = cc_dep2_formal; \
177 /* const */ ULong CC_NDEP = cc_ndep_formal; \
178 /* Four bogus assignments, which hopefully gcc can */ \
179 /* optimise away, and which stop it complaining about */ \
180 /* unused variables. */ \
181 SIGN_MASK = SIGN_MASK; \
182 DATA_MASK = DATA_MASK; \
183 CC_DEP2 = CC_DEP2; \
184 CC_NDEP = CC_NDEP;
185
186
187 /*-------------------------------------------------------------*/
188
189 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
190 { \
191 PREAMBLE(DATA_BITS); \
192 { Long cf, pf, af, zf, sf, of; \
193 Long argL, argR, res; \
194 argL = CC_DEP1; \
195 argR = CC_DEP2; \
196 res = argL + argR; \
197 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
198 pf = parity_table[(UChar)res]; \
199 af = (res ^ argL ^ argR) & 0x10; \
200 zf = ((DATA_UTYPE)res == 0) << 6; \
201 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
202 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
203 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
204 return cf | pf | af | zf | sf | of; \
205 } \
206 }
207
208 /*-------------------------------------------------------------*/
209
210 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
211 { \
212 PREAMBLE(DATA_BITS); \
213 { Long cf, pf, af, zf, sf, of; \
214 Long argL, argR, res; \
215 argL = CC_DEP1; \
216 argR = CC_DEP2; \
217 res = argL - argR; \
218 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
219 pf = parity_table[(UChar)res]; \
220 af = (res ^ argL ^ argR) & 0x10; \
221 zf = ((DATA_UTYPE)res == 0) << 6; \
222 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
223 of = lshift((argL ^ argR) & (argL ^ res), \
224 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
225 return cf | pf | af | zf | sf | of; \
226 } \
227 }
228
229 /*-------------------------------------------------------------*/
230
231 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
232 { \
233 PREAMBLE(DATA_BITS); \
234 { Long cf, pf, af, zf, sf, of; \
235 Long argL, argR, oldC, res; \
236 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
237 argL = CC_DEP1; \
238 argR = CC_DEP2 ^ oldC; \
239 res = (argL + argR) + oldC; \
240 if (oldC) \
241 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
242 else \
243 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
244 pf = parity_table[(UChar)res]; \
245 af = (res ^ argL ^ argR) & 0x10; \
246 zf = ((DATA_UTYPE)res == 0) << 6; \
247 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
248 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
249 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
250 return cf | pf | af | zf | sf | of; \
251 } \
252 }
253
254 /*-------------------------------------------------------------*/
255
256 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
257 { \
258 PREAMBLE(DATA_BITS); \
259 { Long cf, pf, af, zf, sf, of; \
260 Long argL, argR, oldC, res; \
261 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
262 argL = CC_DEP1; \
263 argR = CC_DEP2 ^ oldC; \
264 res = (argL - argR) - oldC; \
265 if (oldC) \
266 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
267 else \
268 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
269 pf = parity_table[(UChar)res]; \
270 af = (res ^ argL ^ argR) & 0x10; \
271 zf = ((DATA_UTYPE)res == 0) << 6; \
272 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
273 of = lshift((argL ^ argR) & (argL ^ res), \
274 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
275 return cf | pf | af | zf | sf | of; \
276 } \
277 }
278
279 /*-------------------------------------------------------------*/
280
281 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
282 { \
283 PREAMBLE(DATA_BITS); \
284 { Long cf, pf, af, zf, sf, of; \
285 cf = 0; \
286 pf = parity_table[(UChar)CC_DEP1]; \
287 af = 0; \
288 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
289 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
290 of = 0; \
291 return cf | pf | af | zf | sf | of; \
292 } \
293 }
294
295 /*-------------------------------------------------------------*/
296
297 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
298 { \
299 PREAMBLE(DATA_BITS); \
300 { Long cf, pf, af, zf, sf, of; \
301 Long argL, argR, res; \
302 res = CC_DEP1; \
303 argL = res - 1; \
304 argR = 1; \
305 cf = CC_NDEP & AMD64G_CC_MASK_C; \
306 pf = parity_table[(UChar)res]; \
307 af = (res ^ argL ^ argR) & 0x10; \
308 zf = ((DATA_UTYPE)res == 0) << 6; \
309 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
310 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
311 return cf | pf | af | zf | sf | of; \
312 } \
313 }
314
315 /*-------------------------------------------------------------*/
316
317 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
318 { \
319 PREAMBLE(DATA_BITS); \
320 { Long cf, pf, af, zf, sf, of; \
321 Long argL, argR, res; \
322 res = CC_DEP1; \
323 argL = res + 1; \
324 argR = 1; \
325 cf = CC_NDEP & AMD64G_CC_MASK_C; \
326 pf = parity_table[(UChar)res]; \
327 af = (res ^ argL ^ argR) & 0x10; \
328 zf = ((DATA_UTYPE)res == 0) << 6; \
329 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
330 of = ((res & DATA_MASK) \
331 == ((ULong)SIGN_MASK - 1)) << 11; \
332 return cf | pf | af | zf | sf | of; \
333 } \
334 }
335
336 /*-------------------------------------------------------------*/
337
338 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
339 { \
340 PREAMBLE(DATA_BITS); \
341 { Long cf, pf, af, zf, sf, of; \
342 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
343 pf = parity_table[(UChar)CC_DEP1]; \
344 af = 0; /* undefined */ \
345 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
346 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
347 /* of is defined if shift count == 1 */ \
348 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
349 & AMD64G_CC_MASK_O; \
350 return cf | pf | af | zf | sf | of; \
351 } \
352 }
353
354 /*-------------------------------------------------------------*/
355
356 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
357 { \
358 PREAMBLE(DATA_BITS); \
359 { Long cf, pf, af, zf, sf, of; \
360 cf = CC_DEP2 & 1; \
361 pf = parity_table[(UChar)CC_DEP1]; \
362 af = 0; /* undefined */ \
363 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
364 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
365 /* of is defined if shift count == 1 */ \
366 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
367 & AMD64G_CC_MASK_O; \
368 return cf | pf | af | zf | sf | of; \
369 } \
370 }
371
372 /*-------------------------------------------------------------*/
373
374 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
375 /* DEP1 = result, NDEP = old flags */
376 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
377 { \
378 PREAMBLE(DATA_BITS); \
379 { Long fl \
380 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
381 | (AMD64G_CC_MASK_C & CC_DEP1) \
382 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
383 11-(DATA_BITS-1)) \
384 ^ lshift(CC_DEP1, 11))); \
385 return fl; \
386 } \
387 }
388
389 /*-------------------------------------------------------------*/
390
391 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
392 /* DEP1 = result, NDEP = old flags */
393 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
394 { \
395 PREAMBLE(DATA_BITS); \
396 { Long fl \
397 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
398 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
399 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
400 11-(DATA_BITS-1)) \
401 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
402 return fl; \
403 } \
404 }
405
406 /*-------------------------------------------------------------*/
407
408 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
409 DATA_U2TYPE, NARROWto2U) \
410 { \
411 PREAMBLE(DATA_BITS); \
412 { Long cf, pf, af, zf, sf, of; \
413 DATA_UTYPE hi; \
414 DATA_UTYPE lo \
415 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
416 * ((DATA_UTYPE)CC_DEP2) ); \
417 DATA_U2TYPE rr \
418 = NARROWto2U( \
419 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
420 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
421 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
422 cf = (hi != 0); \
423 pf = parity_table[(UChar)lo]; \
424 af = 0; /* undefined */ \
425 zf = (lo == 0) << 6; \
426 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
427 of = cf << 11; \
428 return cf | pf | af | zf | sf | of; \
429 } \
430 }
431
432 /*-------------------------------------------------------------*/
433
434 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
435 DATA_S2TYPE, NARROWto2S) \
436 { \
437 PREAMBLE(DATA_BITS); \
438 { Long cf, pf, af, zf, sf, of; \
439 DATA_STYPE hi; \
440 DATA_STYPE lo \
441 = NARROWtoS( ((DATA_STYPE)CC_DEP1) \
442 * ((DATA_STYPE)CC_DEP2) ); \
443 DATA_S2TYPE rr \
444 = NARROWto2S( \
445 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
446 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
447 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
448 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
449 pf = parity_table[(UChar)lo]; \
450 af = 0; /* undefined */ \
451 zf = (lo == 0) << 6; \
452 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
453 of = cf << 11; \
454 return cf | pf | af | zf | sf | of; \
455 } \
456 }
457
458 /*-------------------------------------------------------------*/
459
460 #define ACTIONS_UMULQ \
461 { \
462 PREAMBLE(64); \
463 { Long cf, pf, af, zf, sf, of; \
464 ULong lo, hi; \
465 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
466 cf = (hi != 0); \
467 pf = parity_table[(UChar)lo]; \
468 af = 0; /* undefined */ \
469 zf = (lo == 0) << 6; \
470 sf = lshift(lo, 8 - 64) & 0x80; \
471 of = cf << 11; \
472 return cf | pf | af | zf | sf | of; \
473 } \
474 }
475
476 /*-------------------------------------------------------------*/
477
478 #define ACTIONS_SMULQ \
479 { \
480 PREAMBLE(64); \
481 { Long cf, pf, af, zf, sf, of; \
482 Long lo, hi; \
483 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
484 cf = (hi != (lo >>/*s*/ (64-1))); \
485 pf = parity_table[(UChar)lo]; \
486 af = 0; /* undefined */ \
487 zf = (lo == 0) << 6; \
488 sf = lshift(lo, 8 - 64) & 0x80; \
489 of = cf << 11; \
490 return cf | pf | af | zf | sf | of; \
491 } \
492 }
493
494
495 #if PROFILE_RFLAGS
496
497 static Bool initted = False;
498
499 /* C flag, fast route */
500 static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
501 /* C flag, slow route */
502 static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
503 /* table for calculate_cond */
504 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
505 /* total entry counts for calc_all, calc_c, calc_cond. */
506 static UInt n_calc_all = 0;
507 static UInt n_calc_c = 0;
508 static UInt n_calc_cond = 0;
509
510 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
511
512
showCounts(void)513 static void showCounts ( void )
514 {
515 Int op, co;
516 Char ch;
517 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
518 n_calc_all, n_calc_cond, n_calc_c);
519
520 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
521 " S NS P NP L NL LE NLE\n");
522 vex_printf(" -----------------------------------------------------"
523 "----------------------------------------\n");
524 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
525
526 ch = ' ';
527 if (op > 0 && (op-1) % 4 == 0)
528 ch = 'B';
529 if (op > 0 && (op-1) % 4 == 1)
530 ch = 'W';
531 if (op > 0 && (op-1) % 4 == 2)
532 ch = 'L';
533 if (op > 0 && (op-1) % 4 == 3)
534 ch = 'Q';
535
536 vex_printf("%2d%c: ", op, ch);
537 vex_printf("%6u ", tabc_slow[op]);
538 vex_printf("%6u ", tabc_fast[op]);
539 for (co = 0; co < 16; co++) {
540 Int n = tab_cond[op][co];
541 if (n >= 1000) {
542 vex_printf(" %3dK", n / 1000);
543 } else
544 if (n >= 0) {
545 vex_printf(" %3d ", n );
546 } else {
547 vex_printf(" ");
548 }
549 }
550 vex_printf("\n");
551 }
552 vex_printf("\n");
553 }
554
initCounts(void)555 static void initCounts ( void )
556 {
557 Int op, co;
558 initted = True;
559 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
560 tabc_fast[op] = tabc_slow[op] = 0;
561 for (co = 0; co < 16; co++)
562 tab_cond[op][co] = 0;
563 }
564 }
565
566 #endif /* PROFILE_RFLAGS */
567
568
569 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
570 /* Calculate all the 6 flags from the supplied thunk parameters.
571 Worker function, not directly called from generated code. */
572 static
amd64g_calculate_rflags_all_WRK(ULong cc_op,ULong cc_dep1_formal,ULong cc_dep2_formal,ULong cc_ndep_formal)573 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
574 ULong cc_dep1_formal,
575 ULong cc_dep2_formal,
576 ULong cc_ndep_formal )
577 {
578 switch (cc_op) {
579 case AMD64G_CC_OP_COPY:
580 return cc_dep1_formal
581 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
582 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
583
584 case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
585 case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
586 case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
587 case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong );
588
589 case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
590 case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
591 case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
592 case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong );
593
594 case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
595 case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
596 case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
597 case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong );
598
599 case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
600 case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
601 case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
602 case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong );
603
604 case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
605 case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
606 case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
607 case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong );
608
609 case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
610 case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
611 case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
612 case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong );
613
614 case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
615 case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
616 case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
617 case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong );
618
619 case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
620 case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
621 case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
622 case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong );
623
624 case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
625 case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
626 case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
627 case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong );
628
629 case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
630 case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
631 case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
632 case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong );
633
634 case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
635 case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
636 case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
637 case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong );
638
639 case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
640 UShort, toUShort );
641 case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
642 UInt, toUInt );
643 case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
644 ULong, idULong );
645
646 case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ;
647
648 case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
649 Short, toUShort );
650 case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
651 Int, toUInt );
652 case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
653 Long, idULong );
654
655 case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ;
656
657 default:
658 /* shouldn't really make these calls from generated code */
659 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
660 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
661 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
662 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
663 }
664 }
665
666
667 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
668 /* Calculate all the 6 flags from the supplied thunk parameters. */
amd64g_calculate_rflags_all(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)669 ULong amd64g_calculate_rflags_all ( ULong cc_op,
670 ULong cc_dep1,
671 ULong cc_dep2,
672 ULong cc_ndep )
673 {
674 # if PROFILE_RFLAGS
675 if (!initted) initCounts();
676 n_calc_all++;
677 if (SHOW_COUNTS_NOW) showCounts();
678 # endif
679 return
680 amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
681 }
682
683
684 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
685 /* Calculate just the carry flag from the supplied thunk parameters. */
amd64g_calculate_rflags_c(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)686 ULong amd64g_calculate_rflags_c ( ULong cc_op,
687 ULong cc_dep1,
688 ULong cc_dep2,
689 ULong cc_ndep )
690 {
691 # if PROFILE_RFLAGS
692 if (!initted) initCounts();
693 n_calc_c++;
694 tabc_fast[cc_op]++;
695 if (SHOW_COUNTS_NOW) showCounts();
696 # endif
697
698 /* Fast-case some common ones. */
699 switch (cc_op) {
700 case AMD64G_CC_OP_COPY:
701 return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
702 case AMD64G_CC_OP_LOGICQ:
703 case AMD64G_CC_OP_LOGICL:
704 case AMD64G_CC_OP_LOGICW:
705 case AMD64G_CC_OP_LOGICB:
706 return 0;
707 // case AMD64G_CC_OP_SUBL:
708 // return ((UInt)cc_dep1) < ((UInt)cc_dep2)
709 // ? AMD64G_CC_MASK_C : 0;
710 // case AMD64G_CC_OP_SUBW:
711 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
712 // ? AMD64G_CC_MASK_C : 0;
713 // case AMD64G_CC_OP_SUBB:
714 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
715 // ? AMD64G_CC_MASK_C : 0;
716 // case AMD64G_CC_OP_INCL:
717 // case AMD64G_CC_OP_DECL:
718 // return cc_ndep & AMD64G_CC_MASK_C;
719 default:
720 break;
721 }
722
723 # if PROFILE_RFLAGS
724 tabc_fast[cc_op]--;
725 tabc_slow[cc_op]++;
726 # endif
727
728 return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
729 & AMD64G_CC_MASK_C;
730 }
731
732
733 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
734 /* returns 1 or 0 */
amd64g_calculate_condition(ULong cond,ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)735 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
736 ULong cc_op,
737 ULong cc_dep1,
738 ULong cc_dep2,
739 ULong cc_ndep )
740 {
741 ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
742 cc_dep2, cc_ndep);
743 ULong of,sf,zf,cf,pf;
744 ULong inv = cond & 1;
745
746 # if PROFILE_RFLAGS
747 if (!initted) initCounts();
748 tab_cond[cc_op][cond]++;
749 n_calc_cond++;
750 if (SHOW_COUNTS_NOW) showCounts();
751 # endif
752
753 switch (cond) {
754 case AMD64CondNO:
755 case AMD64CondO: /* OF == 1 */
756 of = rflags >> AMD64G_CC_SHIFT_O;
757 return 1 & (inv ^ of);
758
759 case AMD64CondNZ:
760 case AMD64CondZ: /* ZF == 1 */
761 zf = rflags >> AMD64G_CC_SHIFT_Z;
762 return 1 & (inv ^ zf);
763
764 case AMD64CondNB:
765 case AMD64CondB: /* CF == 1 */
766 cf = rflags >> AMD64G_CC_SHIFT_C;
767 return 1 & (inv ^ cf);
768 break;
769
770 case AMD64CondNBE:
771 case AMD64CondBE: /* (CF or ZF) == 1 */
772 cf = rflags >> AMD64G_CC_SHIFT_C;
773 zf = rflags >> AMD64G_CC_SHIFT_Z;
774 return 1 & (inv ^ (cf | zf));
775 break;
776
777 case AMD64CondNS:
778 case AMD64CondS: /* SF == 1 */
779 sf = rflags >> AMD64G_CC_SHIFT_S;
780 return 1 & (inv ^ sf);
781
782 case AMD64CondNP:
783 case AMD64CondP: /* PF == 1 */
784 pf = rflags >> AMD64G_CC_SHIFT_P;
785 return 1 & (inv ^ pf);
786
787 case AMD64CondNL:
788 case AMD64CondL: /* (SF xor OF) == 1 */
789 sf = rflags >> AMD64G_CC_SHIFT_S;
790 of = rflags >> AMD64G_CC_SHIFT_O;
791 return 1 & (inv ^ (sf ^ of));
792 break;
793
794 case AMD64CondNLE:
795 case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */
796 sf = rflags >> AMD64G_CC_SHIFT_S;
797 of = rflags >> AMD64G_CC_SHIFT_O;
798 zf = rflags >> AMD64G_CC_SHIFT_Z;
799 return 1 & (inv ^ ((sf ^ of) | zf));
800 break;
801
802 default:
803 /* shouldn't really make these calls from generated code */
804 vex_printf("amd64g_calculate_condition"
805 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
806 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
807 vpanic("amd64g_calculate_condition");
808 }
809 }
810
811
812 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_get_rflags(VexGuestAMD64State * vex_state)813 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/VexGuestAMD64State* vex_state )
814 {
815 ULong rflags = amd64g_calculate_rflags_all_WRK(
816 vex_state->guest_CC_OP,
817 vex_state->guest_CC_DEP1,
818 vex_state->guest_CC_DEP2,
819 vex_state->guest_CC_NDEP
820 );
821 Long dflag = vex_state->guest_DFLAG;
822 vassert(dflag == 1 || dflag == -1);
823 if (dflag == -1)
824 rflags |= (1<<10);
825 if (vex_state->guest_IDFLAG == 1)
826 rflags |= (1<<21);
827 if (vex_state->guest_ACFLAG == 1)
828 rflags |= (1<<18);
829
830 return rflags;
831 }
832
833 /* VISIBLE TO LIBVEX CLIENT */
834 void
LibVEX_GuestAMD64_put_rflag_c(ULong new_carry_flag,VexGuestAMD64State * vex_state)835 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
836 /*MOD*/VexGuestAMD64State* vex_state )
837 {
838 ULong oszacp = amd64g_calculate_rflags_all_WRK(
839 vex_state->guest_CC_OP,
840 vex_state->guest_CC_DEP1,
841 vex_state->guest_CC_DEP2,
842 vex_state->guest_CC_NDEP
843 );
844 if (new_carry_flag & 1) {
845 oszacp |= AMD64G_CC_MASK_C;
846 } else {
847 oszacp &= ~AMD64G_CC_MASK_C;
848 }
849 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
850 vex_state->guest_CC_DEP1 = oszacp;
851 vex_state->guest_CC_DEP2 = 0;
852 vex_state->guest_CC_NDEP = 0;
853 }
854
855
856 /*---------------------------------------------------------------*/
857 /*--- %rflags translation-time function specialisers. ---*/
858 /*--- These help iropt specialise calls the above run-time ---*/
859 /*--- %rflags functions. ---*/
860 /*---------------------------------------------------------------*/
861
862 /* Used by the optimiser to try specialisations. Returns an
863 equivalent expression, or NULL if none. */
864
isU64(IRExpr * e,ULong n)865 static Bool isU64 ( IRExpr* e, ULong n )
866 {
867 return toBool( e->tag == Iex_Const
868 && e->Iex.Const.con->tag == Ico_U64
869 && e->Iex.Const.con->Ico.U64 == n );
870 }
871
guest_amd64_spechelper(HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)872 IRExpr* guest_amd64_spechelper ( HChar* function_name,
873 IRExpr** args,
874 IRStmt** precedingStmts,
875 Int n_precedingStmts )
876 {
877 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
878 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
879 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
880 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
881 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
882
883 Int i, arity = 0;
884 for (i = 0; args[i]; i++)
885 arity++;
886 # if 0
887 vex_printf("spec request:\n");
888 vex_printf(" %s ", function_name);
889 for (i = 0; i < arity; i++) {
890 vex_printf(" ");
891 ppIRExpr(args[i]);
892 }
893 vex_printf("\n");
894 # endif
895
896 /* --------- specialising "amd64g_calculate_condition" --------- */
897
898 if (vex_streq(function_name, "amd64g_calculate_condition")) {
899 /* specialise calls to above "calculate condition" function */
900 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
901 vassert(arity == 5);
902 cond = args[0];
903 cc_op = args[1];
904 cc_dep1 = args[2];
905 cc_dep2 = args[3];
906
907 /*---------------- ADDQ ----------------*/
908
909 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
910 /* long long add, then Z --> test (dst+src == 0) */
911 return unop(Iop_1Uto64,
912 binop(Iop_CmpEQ64,
913 binop(Iop_Add64, cc_dep1, cc_dep2),
914 mkU64(0)));
915 }
916
917 /*---------------- SUBQ ----------------*/
918
919 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
920 /* long long sub/cmp, then Z --> test dst==src */
921 return unop(Iop_1Uto64,
922 binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
923 }
924 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
925 /* long long sub/cmp, then NZ --> test dst!=src */
926 return unop(Iop_1Uto64,
927 binop(Iop_CmpNE64,cc_dep1,cc_dep2));
928 }
929
930 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
931 /* long long sub/cmp, then L (signed less than)
932 --> test dst <s src */
933 return unop(Iop_1Uto64,
934 binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
935 }
936
937 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
938 /* long long sub/cmp, then B (unsigned less than)
939 --> test dst <u src */
940 return unop(Iop_1Uto64,
941 binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
942 }
943 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
944 /* long long sub/cmp, then NB (unsigned greater than or equal)
945 --> test src <=u dst */
946 /* Note, args are opposite way round from the usual */
947 return unop(Iop_1Uto64,
948 binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
949 }
950
951 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
952 /* long long sub/cmp, then BE (unsigned less than or equal)
953 --> test dst <=u src */
954 return unop(Iop_1Uto64,
955 binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
956 }
957
958 /*---------------- SUBL ----------------*/
959
960 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
961 /* long sub/cmp, then Z --> test dst==src */
962 return unop(Iop_1Uto64,
963 binop(Iop_CmpEQ32,
964 unop(Iop_64to32, cc_dep1),
965 unop(Iop_64to32, cc_dep2)));
966 }
967 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
968 /* long sub/cmp, then NZ --> test dst!=src */
969 return unop(Iop_1Uto64,
970 binop(Iop_CmpNE32,
971 unop(Iop_64to32, cc_dep1),
972 unop(Iop_64to32, cc_dep2)));
973 }
974
975 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
976 /* long sub/cmp, then L (signed less than)
977 --> test dst <s src */
978 return unop(Iop_1Uto64,
979 binop(Iop_CmpLT32S,
980 unop(Iop_64to32, cc_dep1),
981 unop(Iop_64to32, cc_dep2)));
982 }
983
984 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
985 /* long sub/cmp, then LE (signed less than or equal)
986 --> test dst <=s src */
987 return unop(Iop_1Uto64,
988 binop(Iop_CmpLE32S,
989 unop(Iop_64to32, cc_dep1),
990 unop(Iop_64to32, cc_dep2)));
991
992 }
993 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
994 /* long sub/cmp, then NLE (signed greater than)
995 --> test !(dst <=s src)
996 --> test (dst >s src)
997 --> test (src <s dst) */
998 return unop(Iop_1Uto64,
999 binop(Iop_CmpLT32S,
1000 unop(Iop_64to32, cc_dep2),
1001 unop(Iop_64to32, cc_dep1)));
1002
1003 }
1004
1005 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
1006 /* long sub/cmp, then BE (unsigned less than or equal)
1007 --> test dst <=u src */
1008 return unop(Iop_1Uto64,
1009 binop(Iop_CmpLE32U,
1010 unop(Iop_64to32, cc_dep1),
1011 unop(Iop_64to32, cc_dep2)));
1012 }
1013 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
1014 /* long sub/cmp, then NBE (unsigned greater than)
1015 --> test src <u dst */
1016 /* Note, args are opposite way round from the usual */
1017 return unop(Iop_1Uto64,
1018 binop(Iop_CmpLT32U,
1019 unop(Iop_64to32, cc_dep2),
1020 unop(Iop_64to32, cc_dep1)));
1021 }
1022
1023 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
1024 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
1025 return unop(Iop_1Uto64,
1026 binop(Iop_CmpLT32S,
1027 binop(Iop_Sub32,
1028 unop(Iop_64to32, cc_dep1),
1029 unop(Iop_64to32, cc_dep2)),
1030 mkU32(0)));
1031 }
1032
1033 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
1034 /* long sub/cmp, then B (unsigned less than)
1035 --> test dst <u src */
1036 return unop(Iop_1Uto64,
1037 binop(Iop_CmpLT32U,
1038 unop(Iop_64to32, cc_dep1),
1039 unop(Iop_64to32, cc_dep2)));
1040 }
1041
1042 /*---------------- SUBW ----------------*/
1043
1044 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
1045 /* word sub/cmp, then Z --> test dst==src */
1046 return unop(Iop_1Uto64,
1047 binop(Iop_CmpEQ16,
1048 unop(Iop_64to16,cc_dep1),
1049 unop(Iop_64to16,cc_dep2)));
1050 }
1051 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
1052 /* word sub/cmp, then NZ --> test dst!=src */
1053 return unop(Iop_1Uto64,
1054 binop(Iop_CmpNE16,
1055 unop(Iop_64to16,cc_dep1),
1056 unop(Iop_64to16,cc_dep2)));
1057 }
1058
1059 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
1060 /* word sub/cmp, then LE (signed less than or equal)
1061 --> test dst <=s src */
1062 return unop(Iop_1Uto64,
1063 binop(Iop_CmpLE64S,
1064 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1065 binop(Iop_Shl64,cc_dep2,mkU8(48))));
1066
1067 }
1068
1069 /*---------------- SUBB ----------------*/
1070
1071 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
1072 /* byte sub/cmp, then Z --> test dst==src */
1073 return unop(Iop_1Uto64,
1074 binop(Iop_CmpEQ8,
1075 unop(Iop_64to8,cc_dep1),
1076 unop(Iop_64to8,cc_dep2)));
1077 }
1078 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
1079 /* byte sub/cmp, then NZ --> test dst!=src */
1080 return unop(Iop_1Uto64,
1081 binop(Iop_CmpNE8,
1082 unop(Iop_64to8,cc_dep1),
1083 unop(Iop_64to8,cc_dep2)));
1084 }
1085
1086 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
1087 /* byte sub/cmp, then BE (unsigned less than or equal)
1088 --> test dst <=u src */
1089 return unop(Iop_1Uto64,
1090 binop(Iop_CmpLE64U,
1091 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1092 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1093 }
1094
1095 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
1096 && isU64(cc_dep2, 0)) {
1097 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1098 --> test dst <s 0
1099 --> (ULong)dst[7]
1100 This is yet another scheme by which gcc figures out if the
1101 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1102 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1103 for an 8-bit comparison, since the args to the helper
1104 function are always U64s. */
1105 return binop(Iop_And64,
1106 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1107 mkU64(1));
1108 }
1109 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
1110 && isU64(cc_dep2, 0)) {
1111 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1112 --> test !(dst <s 0)
1113 --> (ULong) !dst[7]
1114 */
1115 return binop(Iop_Xor64,
1116 binop(Iop_And64,
1117 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1118 mkU64(1)),
1119 mkU64(1));
1120 }
1121
1122 /*---------------- LOGICQ ----------------*/
1123
1124 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
1125 /* long long and/or/xor, then Z --> test dst==0 */
1126 return unop(Iop_1Uto64,
1127 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1128 }
1129 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
1130 /* long long and/or/xor, then NZ --> test dst!=0 */
1131 return unop(Iop_1Uto64,
1132 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1133 }
1134
1135 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
1136 /* long long and/or/xor, then L
1137 LOGIC sets SF and ZF according to the
1138 result and makes OF be zero. L computes SF ^ OF, but
1139 OF is zero, so this reduces to SF -- which will be 1 iff
1140 the result is < signed 0. Hence ...
1141 */
1142 return unop(Iop_1Uto64,
1143 binop(Iop_CmpLT64S,
1144 cc_dep1,
1145 mkU64(0)));
1146 }
1147
1148 /*---------------- LOGICL ----------------*/
1149
1150 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
1151 /* long and/or/xor, then Z --> test dst==0 */
1152 return unop(Iop_1Uto64,
1153 binop(Iop_CmpEQ32,
1154 unop(Iop_64to32, cc_dep1),
1155 mkU32(0)));
1156 }
1157 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
1158 /* long and/or/xor, then NZ --> test dst!=0 */
1159 return unop(Iop_1Uto64,
1160 binop(Iop_CmpNE32,
1161 unop(Iop_64to32, cc_dep1),
1162 mkU32(0)));
1163 }
1164
1165 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
1166 /* long and/or/xor, then LE
1167 This is pretty subtle. LOGIC sets SF and ZF according to the
1168 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but
1169 OF is zero, so this reduces to SF | ZF -- which will be 1 iff
1170 the result is <=signed 0. Hence ...
1171 */
1172 return unop(Iop_1Uto64,
1173 binop(Iop_CmpLE32S,
1174 unop(Iop_64to32, cc_dep1),
1175 mkU32(0)));
1176 }
1177
1178 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
1179 /* long and/or/xor, then S --> (ULong)result[31] */
1180 return binop(Iop_And64,
1181 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1182 mkU64(1));
1183 }
1184 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
1185 /* long and/or/xor, then S --> (ULong) ~ result[31] */
1186 return binop(Iop_Xor64,
1187 binop(Iop_And64,
1188 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1189 mkU64(1)),
1190 mkU64(1));
1191 }
1192
1193 /*---------------- LOGICB ----------------*/
1194
1195 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
1196 /* byte and/or/xor, then Z --> test dst==0 */
1197 return unop(Iop_1Uto64,
1198 binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
1199 mkU64(0)));
1200 }
1201 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
1202 /* byte and/or/xor, then NZ --> test dst!=0 */
1203 return unop(Iop_1Uto64,
1204 binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
1205 mkU64(0)));
1206 }
1207
1208 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
1209 /* this is an idiom gcc sometimes uses to find out if the top
1210 bit of a byte register is set: eg testb %al,%al; js ..
1211 Since it just depends on the top bit of the byte, extract
1212 that bit and explicitly get rid of all the rest. This
1213 helps memcheck avoid false positives in the case where any
1214 of the other bits in the byte are undefined. */
1215 /* byte and/or/xor, then S --> (UInt)result[7] */
1216 return binop(Iop_And64,
1217 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1218 mkU64(1));
1219 }
1220 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
1221 /* byte and/or/xor, then NS --> (UInt)!result[7] */
1222 return binop(Iop_Xor64,
1223 binop(Iop_And64,
1224 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1225 mkU64(1)),
1226 mkU64(1));
1227 }
1228
1229 /*---------------- INCB ----------------*/
1230
1231 if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
1232 /* 8-bit inc, then LE --> sign bit of the arg */
1233 return binop(Iop_And64,
1234 binop(Iop_Shr64,
1235 binop(Iop_Sub64, cc_dep1, mkU64(1)),
1236 mkU8(7)),
1237 mkU64(1));
1238 }
1239
1240 /*---------------- INCW ----------------*/
1241
1242 if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
1243 /* 16-bit inc, then Z --> test dst == 0 */
1244 return unop(Iop_1Uto64,
1245 binop(Iop_CmpEQ64,
1246 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1247 mkU64(0)));
1248 }
1249
1250 /*---------------- DECL ----------------*/
1251
1252 if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
1253 /* dec L, then Z --> test dst == 0 */
1254 return unop(Iop_1Uto64,
1255 binop(Iop_CmpEQ32,
1256 unop(Iop_64to32, cc_dep1),
1257 mkU32(0)));
1258 }
1259
1260 /*---------------- DECW ----------------*/
1261
1262 if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
1263 /* 16-bit dec, then NZ --> test dst != 0 */
1264 return unop(Iop_1Uto64,
1265 binop(Iop_CmpNE64,
1266 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1267 mkU64(0)));
1268 }
1269
1270 /*---------------- COPY ----------------*/
1271 /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1272 jbe" for example. */
1273
1274 if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
1275 (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
1276 /* COPY, then BE --> extract C and Z from dep1, and test (C
1277 or Z == 1). */
1278 /* COPY, then NBE --> extract C and Z from dep1, and test (C
1279 or Z == 0). */
1280 ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
1281 return
1282 unop(
1283 Iop_1Uto64,
1284 binop(
1285 Iop_CmpEQ64,
1286 binop(
1287 Iop_And64,
1288 binop(
1289 Iop_Or64,
1290 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1291 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
1292 ),
1293 mkU64(1)
1294 ),
1295 mkU64(nnn)
1296 )
1297 );
1298 }
1299
1300 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
1301 /* COPY, then B --> extract C dep1, and test (C == 1). */
1302 return
1303 unop(
1304 Iop_1Uto64,
1305 binop(
1306 Iop_CmpNE64,
1307 binop(
1308 Iop_And64,
1309 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1310 mkU64(1)
1311 ),
1312 mkU64(0)
1313 )
1314 );
1315 }
1316
1317 if (isU64(cc_op, AMD64G_CC_OP_COPY)
1318 && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
1319 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1320 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1321 UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
1322 return
1323 unop(
1324 Iop_1Uto64,
1325 binop(
1326 Iop_CmpEQ64,
1327 binop(
1328 Iop_And64,
1329 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
1330 mkU64(1)
1331 ),
1332 mkU64(nnn)
1333 )
1334 );
1335 }
1336
1337 if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
1338 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1339 return
1340 unop(
1341 Iop_1Uto64,
1342 binop(
1343 Iop_CmpNE64,
1344 binop(
1345 Iop_And64,
1346 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
1347 mkU64(1)
1348 ),
1349 mkU64(0)
1350 )
1351 );
1352 }
1353
1354 return NULL;
1355 }
1356
1357 /* --------- specialising "amd64g_calculate_rflags_c" --------- */
1358
1359 if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
1360 /* specialise calls to above "calculate_rflags_c" function */
1361 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1362 vassert(arity == 4);
1363 cc_op = args[0];
1364 cc_dep1 = args[1];
1365 cc_dep2 = args[2];
1366 cc_ndep = args[3];
1367
1368 if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
1369 /* C after sub denotes unsigned less than */
1370 return unop(Iop_1Uto64,
1371 binop(Iop_CmpLT64U,
1372 cc_dep1,
1373 cc_dep2));
1374 }
1375 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
1376 /* C after sub denotes unsigned less than */
1377 return unop(Iop_1Uto64,
1378 binop(Iop_CmpLT32U,
1379 unop(Iop_64to32, cc_dep1),
1380 unop(Iop_64to32, cc_dep2)));
1381 }
1382 if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
1383 /* C after sub denotes unsigned less than */
1384 return unop(Iop_1Uto64,
1385 binop(Iop_CmpLT64U,
1386 binop(Iop_And64,cc_dep1,mkU64(0xFF)),
1387 binop(Iop_And64,cc_dep2,mkU64(0xFF))));
1388 }
1389 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
1390 || isU64(cc_op, AMD64G_CC_OP_LOGICL)
1391 || isU64(cc_op, AMD64G_CC_OP_LOGICW)
1392 || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
1393 /* cflag after logic is zero */
1394 return mkU64(0);
1395 }
1396 if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
1397 || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
1398 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1399 return cc_ndep;
1400 }
1401
1402 # if 0
1403 if (cc_op->tag == Iex_Const) {
1404 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1405 }
1406 # endif
1407
1408 return NULL;
1409 }
1410
1411 # undef unop
1412 # undef binop
1413 # undef mkU64
1414 # undef mkU32
1415 # undef mkU8
1416
1417 return NULL;
1418 }
1419
1420
1421 /*---------------------------------------------------------------*/
1422 /*--- Supporting functions for x87 FPU activities. ---*/
1423 /*---------------------------------------------------------------*/
1424
host_is_little_endian(void)1425 static inline Bool host_is_little_endian ( void )
1426 {
1427 UInt x = 0x76543210;
1428 UChar* p = (UChar*)(&x);
1429 return toBool(*p == 0x10);
1430 }
1431
1432 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1433 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_FXAM(ULong tag,ULong dbl)1434 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
1435 {
1436 Bool mantissaIsZero;
1437 Int bexp;
1438 UChar sign;
1439 UChar* f64;
1440
1441 vassert(host_is_little_endian());
1442
1443 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1444
1445 f64 = (UChar*)(&dbl);
1446 sign = toUChar( (f64[7] >> 7) & 1 );
1447
1448 /* First off, if the tag indicates the register was empty,
1449 return 1,0,sign,1 */
1450 if (tag == 0) {
1451 /* vex_printf("Empty\n"); */
1452 return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
1453 | AMD64G_FC_MASK_C0;
1454 }
1455
1456 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1457 bexp &= 0x7FF;
1458
1459 mantissaIsZero
1460 = toBool(
1461 (f64[6] & 0x0F) == 0
1462 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1463 );
1464
1465 /* If both exponent and mantissa are zero, the value is zero.
1466 Return 1,0,sign,0. */
1467 if (bexp == 0 && mantissaIsZero) {
1468 /* vex_printf("Zero\n"); */
1469 return AMD64G_FC_MASK_C3 | 0
1470 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1471 }
1472
1473 /* If exponent is zero but mantissa isn't, it's a denormal.
1474 Return 1,1,sign,0. */
1475 if (bexp == 0 && !mantissaIsZero) {
1476 /* vex_printf("Denormal\n"); */
1477 return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
1478 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1479 }
1480
1481 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1482 Return 0,1,sign,1. */
1483 if (bexp == 0x7FF && mantissaIsZero) {
1484 /* vex_printf("Inf\n"); */
1485 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
1486 | AMD64G_FC_MASK_C0;
1487 }
1488
1489 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1490 Return 0,0,sign,1. */
1491 if (bexp == 0x7FF && !mantissaIsZero) {
1492 /* vex_printf("NaN\n"); */
1493 return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
1494 }
1495
1496 /* Uh, ok, we give up. It must be a normal finite number.
1497 Return 0,1,sign,0.
1498 */
1499 /* vex_printf("normal\n"); */
1500 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1501 }
1502
1503
1504 /* This is used to implement both 'frstor' and 'fldenv'. The latter
1505 appears to differ from the former only in that the 8 FP registers
1506 themselves are not transferred into the guest state. */
1507 static
do_put_x87(Bool moveRegs,UChar * x87_state,VexGuestAMD64State * vex_state)1508 VexEmWarn do_put_x87 ( Bool moveRegs,
1509 /*IN*/UChar* x87_state,
1510 /*OUT*/VexGuestAMD64State* vex_state )
1511 {
1512 Int stno, preg;
1513 UInt tag;
1514 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1515 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1516 Fpu_State* x87 = (Fpu_State*)x87_state;
1517 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
1518 UInt tagw = x87->env[FP_ENV_TAG];
1519 UInt fpucw = x87->env[FP_ENV_CTRL];
1520 UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700;
1521 VexEmWarn ew;
1522 UInt fpround;
1523 ULong pair;
1524
1525 /* Copy registers and tags */
1526 for (stno = 0; stno < 8; stno++) {
1527 preg = (stno + ftop) & 7;
1528 tag = (tagw >> (2*preg)) & 3;
1529 if (tag == 3) {
1530 /* register is empty */
1531 /* hmm, if it's empty, does it still get written? Probably
1532 safer to say it does. If we don't, memcheck could get out
1533 of sync, in that it thinks all FP registers are defined by
1534 this helper, but in reality some have not been updated. */
1535 if (moveRegs)
1536 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1537 vexTags[preg] = 0;
1538 } else {
1539 /* register is non-empty */
1540 if (moveRegs)
1541 convert_f80le_to_f64le( &x87->reg[10*stno],
1542 (UChar*)&vexRegs[preg] );
1543 vexTags[preg] = 1;
1544 }
1545 }
1546
1547 /* stack pointer */
1548 vex_state->guest_FTOP = ftop;
1549
1550 /* status word */
1551 vex_state->guest_FC3210 = c3210;
1552
1553 /* handle the control word, setting FPROUND and detecting any
1554 emulation warnings. */
1555 pair = amd64g_check_fldcw ( (ULong)fpucw );
1556 fpround = (UInt)pair;
1557 ew = (VexEmWarn)(pair >> 32);
1558
1559 vex_state->guest_FPROUND = fpround & 3;
1560
1561 /* emulation warnings --> caller */
1562 return ew;
1563 }
1564
1565
1566 /* Create an x87 FPU state from the guest state, as close as
1567 we can approximate it. */
1568 static
do_get_x87(VexGuestAMD64State * vex_state,UChar * x87_state)1569 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
1570 /*OUT*/UChar* x87_state )
1571 {
1572 Int i, stno, preg;
1573 UInt tagw;
1574 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1575 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1576 Fpu_State* x87 = (Fpu_State*)x87_state;
1577 UInt ftop = vex_state->guest_FTOP;
1578 UInt c3210 = vex_state->guest_FC3210;
1579
1580 for (i = 0; i < 14; i++)
1581 x87->env[i] = 0;
1582
1583 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1584 x87->env[FP_ENV_STAT]
1585 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1586 x87->env[FP_ENV_CTRL]
1587 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
1588
1589 /* Dump the register stack in ST order. */
1590 tagw = 0;
1591 for (stno = 0; stno < 8; stno++) {
1592 preg = (stno + ftop) & 7;
1593 if (vexTags[preg] == 0) {
1594 /* register is empty */
1595 tagw |= (3 << (2*preg));
1596 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1597 &x87->reg[10*stno] );
1598 } else {
1599 /* register is full. */
1600 tagw |= (0 << (2*preg));
1601 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1602 &x87->reg[10*stno] );
1603 }
1604 }
1605 x87->env[FP_ENV_TAG] = toUShort(tagw);
1606 }
1607
1608
1609 /* CALLED FROM GENERATED CODE */
1610 /* DIRTY HELPER (reads guest state, writes guest mem) */
1611 /* NOTE: only handles 32-bit format (no REX.W on the insn) */
amd64g_dirtyhelper_FXSAVE(VexGuestAMD64State * gst,HWord addr)1612 void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr )
1613 {
1614 /* Derived from values obtained from
1615 vendor_id : AuthenticAMD
1616 cpu family : 15
1617 model : 12
1618 model name : AMD Athlon(tm) 64 Processor 3200+
1619 stepping : 0
1620 cpu MHz : 2200.000
1621 cache size : 512 KB
1622 */
1623 /* Somewhat roundabout, but at least it's simple. */
1624 Fpu_State tmp;
1625 UShort* addrS = (UShort*)addr;
1626 UChar* addrC = (UChar*)addr;
1627 U128* xmm = (U128*)(addr + 160);
1628 UInt mxcsr;
1629 UShort fp_tags;
1630 UInt summary_tags;
1631 Int r, stno;
1632 UShort *srcS, *dstS;
1633
1634 do_get_x87( gst, (UChar*)&tmp );
1635 mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
1636
1637 /* Now build the proper fxsave image from the x87 image we just
1638 made. */
1639
1640 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1641 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1642
1643 /* set addrS[2] in an endian-independent way */
1644 summary_tags = 0;
1645 fp_tags = tmp.env[FP_ENV_TAG];
1646 for (r = 0; r < 8; r++) {
1647 if ( ((fp_tags >> (2*r)) & 3) != 3 )
1648 summary_tags |= (1 << r);
1649 }
1650 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
1651 addrC[5] = 0; /* pad */
1652
1653 /* FOP: faulting fpu opcode. From experimentation, the real CPU
1654 does not write this field. (?!) */
1655 addrS[3] = 0; /* BOGUS */
1656
1657 /* RIP (Last x87 instruction pointer). From experimentation, the
1658 real CPU does not write this field. (?!) */
1659 addrS[4] = 0; /* BOGUS */
1660 addrS[5] = 0; /* BOGUS */
1661 addrS[6] = 0; /* BOGUS */
1662 addrS[7] = 0; /* BOGUS */
1663
1664 /* RDP (Last x87 data pointer). From experimentation, the real CPU
1665 does not write this field. (?!) */
1666 addrS[8] = 0; /* BOGUS */
1667 addrS[9] = 0; /* BOGUS */
1668 addrS[10] = 0; /* BOGUS */
1669 addrS[11] = 0; /* BOGUS */
1670
1671 addrS[12] = toUShort(mxcsr); /* MXCSR */
1672 addrS[13] = toUShort(mxcsr >> 16);
1673
1674 addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
1675 addrS[15] = 0x0000; /* MXCSR mask (hi16) */
1676
1677 /* Copy in the FP registers, in ST order. */
1678 for (stno = 0; stno < 8; stno++) {
1679 srcS = (UShort*)(&tmp.reg[10*stno]);
1680 dstS = (UShort*)(&addrS[16 + 8*stno]);
1681 dstS[0] = srcS[0];
1682 dstS[1] = srcS[1];
1683 dstS[2] = srcS[2];
1684 dstS[3] = srcS[3];
1685 dstS[4] = srcS[4];
1686 dstS[5] = 0;
1687 dstS[6] = 0;
1688 dstS[7] = 0;
1689 }
1690
1691 /* That's the first 160 bytes of the image done. Now only %xmm0
1692 .. %xmm15 remain to be copied. If the host is big-endian, these
1693 need to be byte-swapped. */
1694 vassert(host_is_little_endian());
1695
1696 # define COPY_U128(_dst,_src) \
1697 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1698 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1699 while (0)
1700
1701 COPY_U128( xmm[0], gst->guest_XMM0 );
1702 COPY_U128( xmm[1], gst->guest_XMM1 );
1703 COPY_U128( xmm[2], gst->guest_XMM2 );
1704 COPY_U128( xmm[3], gst->guest_XMM3 );
1705 COPY_U128( xmm[4], gst->guest_XMM4 );
1706 COPY_U128( xmm[5], gst->guest_XMM5 );
1707 COPY_U128( xmm[6], gst->guest_XMM6 );
1708 COPY_U128( xmm[7], gst->guest_XMM7 );
1709 COPY_U128( xmm[8], gst->guest_XMM8 );
1710 COPY_U128( xmm[9], gst->guest_XMM9 );
1711 COPY_U128( xmm[10], gst->guest_XMM10 );
1712 COPY_U128( xmm[11], gst->guest_XMM11 );
1713 COPY_U128( xmm[12], gst->guest_XMM12 );
1714 COPY_U128( xmm[13], gst->guest_XMM13 );
1715 COPY_U128( xmm[14], gst->guest_XMM14 );
1716 COPY_U128( xmm[15], gst->guest_XMM15 );
1717
1718 # undef COPY_U128
1719 }
1720
1721
1722 /* CALLED FROM GENERATED CODE */
1723 /* DIRTY HELPER (writes guest state, reads guest mem) */
amd64g_dirtyhelper_FXRSTOR(VexGuestAMD64State * gst,HWord addr)1724 VexEmWarn amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State* gst, HWord addr )
1725 {
1726 Fpu_State tmp;
1727 VexEmWarn warnX87 = EmWarn_NONE;
1728 VexEmWarn warnXMM = EmWarn_NONE;
1729 UShort* addrS = (UShort*)addr;
1730 UChar* addrC = (UChar*)addr;
1731 U128* xmm = (U128*)(addr + 160);
1732 UShort fp_tags;
1733 Int r, stno, i;
1734
1735 /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need
1736 to be byte-swapped. */
1737 vassert(host_is_little_endian());
1738
1739 # define COPY_U128(_dst,_src) \
1740 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1741 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1742 while (0)
1743
1744 COPY_U128( gst->guest_XMM0, xmm[0] );
1745 COPY_U128( gst->guest_XMM1, xmm[1] );
1746 COPY_U128( gst->guest_XMM2, xmm[2] );
1747 COPY_U128( gst->guest_XMM3, xmm[3] );
1748 COPY_U128( gst->guest_XMM4, xmm[4] );
1749 COPY_U128( gst->guest_XMM5, xmm[5] );
1750 COPY_U128( gst->guest_XMM6, xmm[6] );
1751 COPY_U128( gst->guest_XMM7, xmm[7] );
1752 COPY_U128( gst->guest_XMM8, xmm[8] );
1753 COPY_U128( gst->guest_XMM9, xmm[9] );
1754 COPY_U128( gst->guest_XMM10, xmm[10] );
1755 COPY_U128( gst->guest_XMM11, xmm[11] );
1756 COPY_U128( gst->guest_XMM12, xmm[12] );
1757 COPY_U128( gst->guest_XMM13, xmm[13] );
1758 COPY_U128( gst->guest_XMM14, xmm[14] );
1759 COPY_U128( gst->guest_XMM15, xmm[15] );
1760
1761 # undef COPY_U128
1762
1763 /* Copy the x87 registers out of the image, into a temporary
1764 Fpu_State struct. */
1765 for (i = 0; i < 14; i++) tmp.env[i] = 0;
1766 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1767 /* fill in tmp.reg[0..7] */
1768 for (stno = 0; stno < 8; stno++) {
1769 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1770 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1771 dstS[0] = srcS[0];
1772 dstS[1] = srcS[1];
1773 dstS[2] = srcS[2];
1774 dstS[3] = srcS[3];
1775 dstS[4] = srcS[4];
1776 }
1777 /* fill in tmp.env[0..13] */
1778 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1779 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1780
1781 fp_tags = 0;
1782 for (r = 0; r < 8; r++) {
1783 if (addrC[4] & (1<<r))
1784 fp_tags |= (0 << (2*r)); /* EMPTY */
1785 else
1786 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1787 }
1788 tmp.env[FP_ENV_TAG] = fp_tags;
1789
1790 /* Now write 'tmp' into the guest state. */
1791 warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
1792
1793 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1794 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1795 ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
1796
1797 warnXMM = (VexEmWarn)(w64 >> 32);
1798
1799 gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
1800 }
1801
1802 /* Prefer an X87 emwarn over an XMM one, if both exist. */
1803 if (warnX87 != EmWarn_NONE)
1804 return warnX87;
1805 else
1806 return warnXMM;
1807 }
1808
1809
1810 /* DIRTY HELPER (writes guest state) */
1811 /* Initialise the x87 FPU state as per 'finit'. */
amd64g_dirtyhelper_FINIT(VexGuestAMD64State * gst)1812 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
1813 {
1814 Int i;
1815 gst->guest_FTOP = 0;
1816 for (i = 0; i < 8; i++) {
1817 gst->guest_FPTAG[i] = 0; /* empty */
1818 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1819 }
1820 gst->guest_FPROUND = (ULong)Irrm_NEAREST;
1821 gst->guest_FC3210 = 0;
1822 }
1823
1824
1825 /* CALLED FROM GENERATED CODE */
1826 /* DIRTY HELPER (reads guest memory) */
amd64g_dirtyhelper_loadF80le(ULong addrU)1827 ULong amd64g_dirtyhelper_loadF80le ( ULong addrU )
1828 {
1829 ULong f64;
1830 convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 );
1831 return f64;
1832 }
1833
1834 /* CALLED FROM GENERATED CODE */
1835 /* DIRTY HELPER (writes guest memory) */
amd64g_dirtyhelper_storeF80le(ULong addrU,ULong f64)1836 void amd64g_dirtyhelper_storeF80le ( ULong addrU, ULong f64 )
1837 {
1838 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) );
1839 }
1840
1841
1842 /* CALLED FROM GENERATED CODE */
1843 /* CLEAN HELPER */
1844 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1845 Extract from it the required SSEROUND value and any resulting
1846 emulation warning, and return (warn << 32) | sseround value.
1847 */
amd64g_check_ldmxcsr(ULong mxcsr)1848 ULong amd64g_check_ldmxcsr ( ULong mxcsr )
1849 {
1850 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
1851 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1852 ULong rmode = (mxcsr >> 13) & 3;
1853
1854 /* Detect any required emulation warnings. */
1855 VexEmWarn ew = EmWarn_NONE;
1856
1857 if ((mxcsr & 0x1F80) != 0x1F80) {
1858 /* unmasked exceptions! */
1859 ew = EmWarn_X86_sseExns;
1860 }
1861 else
1862 if (mxcsr & (1<<15)) {
1863 /* FZ is set */
1864 ew = EmWarn_X86_fz;
1865 }
1866 else
1867 if (mxcsr & (1<<6)) {
1868 /* DAZ is set */
1869 ew = EmWarn_X86_daz;
1870 }
1871
1872 return (((ULong)ew) << 32) | ((ULong)rmode);
1873 }
1874
1875
1876 /* CALLED FROM GENERATED CODE */
1877 /* CLEAN HELPER */
1878 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1879 native format MXCSR value. */
amd64g_create_mxcsr(ULong sseround)1880 ULong amd64g_create_mxcsr ( ULong sseround )
1881 {
1882 sseround &= 3;
1883 return 0x1F80 | (sseround << 13);
1884 }
1885
1886
1887 /* CLEAN HELPER */
1888 /* fpucw[15:0] contains a x87 native format FPU control word.
1889 Extract from it the required FPROUND value and any resulting
1890 emulation warning, and return (warn << 32) | fpround value.
1891 */
amd64g_check_fldcw(ULong fpucw)1892 ULong amd64g_check_fldcw ( ULong fpucw )
1893 {
1894 /* Decide on a rounding mode. fpucw[11:10] holds it. */
1895 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1896 ULong rmode = (fpucw >> 10) & 3;
1897
1898 /* Detect any required emulation warnings. */
1899 VexEmWarn ew = EmWarn_NONE;
1900
1901 if ((fpucw & 0x3F) != 0x3F) {
1902 /* unmasked exceptions! */
1903 ew = EmWarn_X86_x87exns;
1904 }
1905 else
1906 if (((fpucw >> 8) & 3) != 3) {
1907 /* unsupported precision */
1908 ew = EmWarn_X86_x87precision;
1909 }
1910
1911 return (((ULong)ew) << 32) | ((ULong)rmode);
1912 }
1913
1914
1915 /* CLEAN HELPER */
1916 /* Given fpround as an IRRoundingMode value, create a suitable x87
1917 native format FPU control word. */
amd64g_create_fpucw(ULong fpround)1918 ULong amd64g_create_fpucw ( ULong fpround )
1919 {
1920 fpround &= 3;
1921 return 0x037F | (fpround << 10);
1922 }
1923
1924
1925 /* This is used to implement 'fldenv'.
1926 Reads 28 bytes at x87_state[0 .. 27]. */
1927 /* CALLED FROM GENERATED CODE */
1928 /* DIRTY HELPER */
amd64g_dirtyhelper_FLDENV(VexGuestAMD64State * vex_state,HWord x87_state)1929 VexEmWarn amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
1930 /*IN*/HWord x87_state)
1931 {
1932 Int stno, preg;
1933 UInt tag;
1934 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1935 Fpu_State* x87 = (Fpu_State*)x87_state;
1936 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
1937 UInt tagw = x87->env[FP_ENV_TAG];
1938 UInt fpucw = x87->env[FP_ENV_CTRL];
1939 ULong c3210 = x87->env[FP_ENV_STAT] & 0x4700;
1940 VexEmWarn ew;
1941 ULong fpround;
1942 ULong pair;
1943
1944 /* Copy tags */
1945 for (stno = 0; stno < 8; stno++) {
1946 preg = (stno + ftop) & 7;
1947 tag = (tagw >> (2*preg)) & 3;
1948 if (tag == 3) {
1949 /* register is empty */
1950 vexTags[preg] = 0;
1951 } else {
1952 /* register is non-empty */
1953 vexTags[preg] = 1;
1954 }
1955 }
1956
1957 /* stack pointer */
1958 vex_state->guest_FTOP = ftop;
1959
1960 /* status word */
1961 vex_state->guest_FC3210 = c3210;
1962
1963 /* handle the control word, setting FPROUND and detecting any
1964 emulation warnings. */
1965 pair = amd64g_check_fldcw ( (ULong)fpucw );
1966 fpround = pair & 0xFFFFFFFFULL;
1967 ew = (VexEmWarn)(pair >> 32);
1968
1969 vex_state->guest_FPROUND = fpround & 3;
1970
1971 /* emulation warnings --> caller */
1972 return ew;
1973 }
1974
1975
1976 /* CALLED FROM GENERATED CODE */
1977 /* DIRTY HELPER */
1978 /* Create an x87 FPU env from the guest state, as close as we can
1979 approximate it. Writes 28 bytes at x87_state[0..27]. */
amd64g_dirtyhelper_FSTENV(VexGuestAMD64State * vex_state,HWord x87_state)1980 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
1981 /*OUT*/HWord x87_state )
1982 {
1983 Int i, stno, preg;
1984 UInt tagw;
1985 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1986 Fpu_State* x87 = (Fpu_State*)x87_state;
1987 UInt ftop = vex_state->guest_FTOP;
1988 ULong c3210 = vex_state->guest_FC3210;
1989
1990 for (i = 0; i < 14; i++)
1991 x87->env[i] = 0;
1992
1993 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1994 x87->env[FP_ENV_STAT]
1995 = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
1996 x87->env[FP_ENV_CTRL]
1997 = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
1998
1999 /* Compute the x87 tag word. */
2000 tagw = 0;
2001 for (stno = 0; stno < 8; stno++) {
2002 preg = (stno + ftop) & 7;
2003 if (vexTags[preg] == 0) {
2004 /* register is empty */
2005 tagw |= (3 << (2*preg));
2006 } else {
2007 /* register is full. */
2008 tagw |= (0 << (2*preg));
2009 }
2010 }
2011 x87->env[FP_ENV_TAG] = toUShort(tagw);
2012
2013 /* We don't dump the x87 registers, tho. */
2014 }
2015
2016
2017 /*---------------------------------------------------------------*/
2018 /*--- Misc integer helpers, including rotates and CPUID. ---*/
2019 /*---------------------------------------------------------------*/
2020
2021 /* Claim to be the following CPU, which is probably representative of
2022 the lowliest (earliest) amd64 offerings. It can do neither sse3
2023 nor cx16.
2024
2025 vendor_id : AuthenticAMD
2026 cpu family : 15
2027 model : 5
2028 model name : AMD Opteron (tm) Processor 848
2029 stepping : 10
2030 cpu MHz : 1797.682
2031 cache size : 1024 KB
2032 fpu : yes
2033 fpu_exception : yes
2034 cpuid level : 1
2035 wp : yes
2036 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2037 mtrr pge mca cmov pat pse36 clflush mmx fxsr
2038 sse sse2 syscall nx mmxext lm 3dnowext 3dnow
2039 bogomips : 3600.62
2040 TLB size : 1088 4K pages
2041 clflush size : 64
2042 cache_alignment : 64
2043 address sizes : 40 bits physical, 48 bits virtual
2044 power management: ts fid vid ttp
2045 */
amd64g_dirtyhelper_CPUID_baseline(VexGuestAMD64State * st)2046 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
2047 {
2048 # define SET_ABCD(_a,_b,_c,_d) \
2049 do { st->guest_RAX = (ULong)(_a); \
2050 st->guest_RBX = (ULong)(_b); \
2051 st->guest_RCX = (ULong)(_c); \
2052 st->guest_RDX = (ULong)(_d); \
2053 } while (0)
2054
2055 switch (0xFFFFFFFF & st->guest_RAX) {
2056 case 0x00000000:
2057 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
2058 break;
2059 case 0x00000001:
2060 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
2061 break;
2062 case 0x80000000:
2063 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
2064 break;
2065 case 0x80000001:
2066 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, 0xe1d3fbff);
2067 break;
2068 case 0x80000002:
2069 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
2070 break;
2071 case 0x80000003:
2072 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
2073 break;
2074 case 0x80000004:
2075 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2076 break;
2077 case 0x80000005:
2078 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
2079 break;
2080 case 0x80000006:
2081 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
2082 break;
2083 case 0x80000007:
2084 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
2085 break;
2086 case 0x80000008:
2087 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
2088 break;
2089 default:
2090 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2091 break;
2092 }
2093 # undef SET_ABCD
2094 }
2095
2096
2097 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
2098 capable.
2099
2100 vendor_id : GenuineIntel
2101 cpu family : 6
2102 model : 15
2103 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2104 stepping : 6
2105 cpu MHz : 2394.000
2106 cache size : 4096 KB
2107 physical id : 0
2108 siblings : 2
2109 core id : 0
2110 cpu cores : 2
2111 fpu : yes
2112 fpu_exception : yes
2113 cpuid level : 10
2114 wp : yes
2115 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2116 mtrr pge mca cmov pat pse36 clflush dts acpi
2117 mmx fxsr sse sse2 ss ht tm syscall nx lm
2118 constant_tsc pni monitor ds_cpl vmx est tm2
2119 cx16 xtpr lahf_lm
2120 bogomips : 4798.78
2121 clflush size : 64
2122 cache_alignment : 64
2123 address sizes : 36 bits physical, 48 bits virtual
2124 power management:
2125 */
amd64g_dirtyhelper_CPUID_sse3_and_cx16(VexGuestAMD64State * st)2126 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
2127 {
2128 # define SET_ABCD(_a,_b,_c,_d) \
2129 do { st->guest_RAX = (ULong)(_a); \
2130 st->guest_RBX = (ULong)(_b); \
2131 st->guest_RCX = (ULong)(_c); \
2132 st->guest_RDX = (ULong)(_d); \
2133 } while (0)
2134
2135 switch (0xFFFFFFFF & st->guest_RAX) {
2136 case 0x00000000:
2137 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2138 break;
2139 case 0x00000001:
2140 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2141 break;
2142 case 0x00000002:
2143 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2144 break;
2145 case 0x00000003:
2146 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2147 break;
2148 case 0x00000004: {
2149 switch (0xFFFFFFFF & st->guest_RCX) {
2150 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2151 0x0000003f, 0x00000001); break;
2152 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2153 0x0000003f, 0x00000001); break;
2154 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2155 0x00000fff, 0x00000001); break;
2156 default: SET_ABCD(0x00000000, 0x00000000,
2157 0x00000000, 0x00000000); break;
2158 }
2159 break;
2160 }
2161 case 0x00000005:
2162 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2163 break;
2164 case 0x00000006:
2165 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2166 break;
2167 case 0x00000007:
2168 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2169 break;
2170 case 0x00000008:
2171 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2172 break;
2173 case 0x00000009:
2174 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2175 break;
2176 case 0x0000000a:
2177 unhandled_eax_value:
2178 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2179 break;
2180 case 0x80000000:
2181 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2182 break;
2183 case 0x80000001:
2184 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
2185 break;
2186 case 0x80000002:
2187 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2188 break;
2189 case 0x80000003:
2190 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2191 break;
2192 case 0x80000004:
2193 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2194 break;
2195 case 0x80000005:
2196 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2197 break;
2198 case 0x80000006:
2199 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2200 break;
2201 case 0x80000007:
2202 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2203 break;
2204 case 0x80000008:
2205 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2206 break;
2207 default:
2208 goto unhandled_eax_value;
2209 }
2210 # undef SET_ABCD
2211 }
2212
2213
2214 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
2215 capable.
2216
2217 vendor_id : GenuineIntel
2218 cpu family : 6
2219 model : 37
2220 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz
2221 stepping : 2
2222 cpu MHz : 3334.000
2223 cache size : 4096 KB
2224 physical id : 0
2225 siblings : 4
2226 core id : 0
2227 cpu cores : 2
2228 apicid : 0
2229 initial apicid : 0
2230 fpu : yes
2231 fpu_exception : yes
2232 cpuid level : 11
2233 wp : yes
2234 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2235 mtrr pge mca cmov pat pse36 clflush dts acpi
2236 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2237 lm constant_tsc arch_perfmon pebs bts rep_good
2238 xtopology nonstop_tsc aperfmperf pni pclmulqdq
2239 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
2240 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
2241 arat tpr_shadow vnmi flexpriority ept vpid
2242 MINUS aes (see below)
2243 bogomips : 6957.57
2244 clflush size : 64
2245 cache_alignment : 64
2246 address sizes : 36 bits physical, 48 bits virtual
2247 power management:
2248 */
amd64g_dirtyhelper_CPUID_sse42_and_cx16(VexGuestAMD64State * st)2249 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
2250 {
2251 # define SET_ABCD(_a,_b,_c,_d) \
2252 do { st->guest_RAX = (ULong)(_a); \
2253 st->guest_RBX = (ULong)(_b); \
2254 st->guest_RCX = (ULong)(_c); \
2255 st->guest_RDX = (ULong)(_d); \
2256 } while (0)
2257
2258 UInt old_eax = (UInt)st->guest_RAX;
2259 UInt old_ecx = (UInt)st->guest_RCX;
2260
2261 switch (old_eax) {
2262 case 0x00000000:
2263 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
2264 break;
2265 case 0x00000001:
2266 // & ~(1<<25): don't claim to support AES insns. See
2267 // bug 249991.
2268 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff & ~(1<<25),
2269 0xbfebfbff);
2270 break;
2271 case 0x00000002:
2272 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
2273 break;
2274 case 0x00000003:
2275 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2276 break;
2277 case 0x00000004:
2278 switch (old_ecx) {
2279 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2280 0x0000003f, 0x00000000); break;
2281 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
2282 0x0000007f, 0x00000000); break;
2283 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2284 0x000001ff, 0x00000000); break;
2285 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
2286 0x00000fff, 0x00000002); break;
2287 default: SET_ABCD(0x00000000, 0x00000000,
2288 0x00000000, 0x00000000); break;
2289 }
2290 break;
2291 case 0x00000005:
2292 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2293 break;
2294 case 0x00000006:
2295 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
2296 break;
2297 case 0x00000007:
2298 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2299 break;
2300 case 0x00000008:
2301 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2302 break;
2303 case 0x00000009:
2304 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2305 break;
2306 case 0x0000000a:
2307 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
2308 break;
2309 case 0x0000000b:
2310 switch (old_ecx) {
2311 case 0x00000000:
2312 SET_ABCD(0x00000001, 0x00000002,
2313 0x00000100, 0x00000000); break;
2314 case 0x00000001:
2315 SET_ABCD(0x00000004, 0x00000004,
2316 0x00000201, 0x00000000); break;
2317 default:
2318 SET_ABCD(0x00000000, 0x00000000,
2319 old_ecx, 0x00000000); break;
2320 }
2321 break;
2322 case 0x0000000c:
2323 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2324 break;
2325 case 0x0000000d:
2326 switch (old_ecx) {
2327 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
2328 0x00000100, 0x00000000); break;
2329 case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
2330 0x00000201, 0x00000000); break;
2331 default: SET_ABCD(0x00000000, 0x00000000,
2332 old_ecx, 0x00000000); break;
2333 }
2334 break;
2335 case 0x80000000:
2336 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2337 break;
2338 case 0x80000001:
2339 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
2340 break;
2341 case 0x80000002:
2342 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2343 break;
2344 case 0x80000003:
2345 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
2346 break;
2347 case 0x80000004:
2348 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
2349 break;
2350 case 0x80000005:
2351 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2352 break;
2353 case 0x80000006:
2354 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
2355 break;
2356 case 0x80000007:
2357 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
2358 break;
2359 case 0x80000008:
2360 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2361 break;
2362 default:
2363 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2364 break;
2365 }
2366 # undef SET_ABCD
2367 }
2368
2369
amd64g_calculate_RCR(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)2370 ULong amd64g_calculate_RCR ( ULong arg,
2371 ULong rot_amt,
2372 ULong rflags_in,
2373 Long szIN )
2374 {
2375 Bool wantRflags = toBool(szIN < 0);
2376 ULong sz = wantRflags ? (-szIN) : szIN;
2377 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
2378 ULong cf=0, of=0, tempcf;
2379
2380 switch (sz) {
2381 case 8:
2382 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2383 of = ((arg >> 63) ^ cf) & 1;
2384 while (tempCOUNT > 0) {
2385 tempcf = arg & 1;
2386 arg = (arg >> 1) | (cf << 63);
2387 cf = tempcf;
2388 tempCOUNT--;
2389 }
2390 break;
2391 case 4:
2392 while (tempCOUNT >= 33) tempCOUNT -= 33;
2393 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2394 of = ((arg >> 31) ^ cf) & 1;
2395 while (tempCOUNT > 0) {
2396 tempcf = arg & 1;
2397 arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
2398 cf = tempcf;
2399 tempCOUNT--;
2400 }
2401 break;
2402 case 2:
2403 while (tempCOUNT >= 17) tempCOUNT -= 17;
2404 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2405 of = ((arg >> 15) ^ cf) & 1;
2406 while (tempCOUNT > 0) {
2407 tempcf = arg & 1;
2408 arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
2409 cf = tempcf;
2410 tempCOUNT--;
2411 }
2412 break;
2413 case 1:
2414 while (tempCOUNT >= 9) tempCOUNT -= 9;
2415 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2416 of = ((arg >> 7) ^ cf) & 1;
2417 while (tempCOUNT > 0) {
2418 tempcf = arg & 1;
2419 arg = ((arg >> 1) & 0x7FULL) | (cf << 7);
2420 cf = tempcf;
2421 tempCOUNT--;
2422 }
2423 break;
2424 default:
2425 vpanic("calculate_RCR(amd64g): invalid size");
2426 }
2427
2428 cf &= 1;
2429 of &= 1;
2430 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
2431 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
2432
2433 /* caller can ask to have back either the resulting flags or
2434 resulting value, but not both */
2435 return wantRflags ? rflags_in : arg;
2436 }
2437
amd64g_calculate_RCL(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)2438 ULong amd64g_calculate_RCL ( ULong arg,
2439 ULong rot_amt,
2440 ULong rflags_in,
2441 Long szIN )
2442 {
2443 Bool wantRflags = toBool(szIN < 0);
2444 ULong sz = wantRflags ? (-szIN) : szIN;
2445 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
2446 ULong cf=0, of=0, tempcf;
2447
2448 switch (sz) {
2449 case 8:
2450 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2451 while (tempCOUNT > 0) {
2452 tempcf = (arg >> 63) & 1;
2453 arg = (arg << 1) | (cf & 1);
2454 cf = tempcf;
2455 tempCOUNT--;
2456 }
2457 of = ((arg >> 63) ^ cf) & 1;
2458 break;
2459 case 4:
2460 while (tempCOUNT >= 33) tempCOUNT -= 33;
2461 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2462 while (tempCOUNT > 0) {
2463 tempcf = (arg >> 31) & 1;
2464 arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
2465 cf = tempcf;
2466 tempCOUNT--;
2467 }
2468 of = ((arg >> 31) ^ cf) & 1;
2469 break;
2470 case 2:
2471 while (tempCOUNT >= 17) tempCOUNT -= 17;
2472 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2473 while (tempCOUNT > 0) {
2474 tempcf = (arg >> 15) & 1;
2475 arg = 0xFFFFULL & ((arg << 1) | (cf & 1));
2476 cf = tempcf;
2477 tempCOUNT--;
2478 }
2479 of = ((arg >> 15) ^ cf) & 1;
2480 break;
2481 case 1:
2482 while (tempCOUNT >= 9) tempCOUNT -= 9;
2483 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2484 while (tempCOUNT > 0) {
2485 tempcf = (arg >> 7) & 1;
2486 arg = 0xFFULL & ((arg << 1) | (cf & 1));
2487 cf = tempcf;
2488 tempCOUNT--;
2489 }
2490 of = ((arg >> 7) ^ cf) & 1;
2491 break;
2492 default:
2493 vpanic("calculate_RCL(amd64g): invalid size");
2494 }
2495
2496 cf &= 1;
2497 of &= 1;
2498 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
2499 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
2500
2501 return wantRflags ? rflags_in : arg;
2502 }
2503
2504 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
2505 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
2506 */
amd64g_calculate_pclmul(ULong a,ULong b,ULong which)2507 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
2508 {
2509 ULong hi, lo, tmp, A[16];
2510
2511 A[0] = 0; A[1] = a;
2512 A[2] = A[1] << 1; A[3] = A[2] ^ a;
2513 A[4] = A[2] << 1; A[5] = A[4] ^ a;
2514 A[6] = A[3] << 1; A[7] = A[6] ^ a;
2515 A[8] = A[4] << 1; A[9] = A[8] ^ a;
2516 A[10] = A[5] << 1; A[11] = A[10] ^ a;
2517 A[12] = A[6] << 1; A[13] = A[12] ^ a;
2518 A[14] = A[7] << 1; A[15] = A[14] ^ a;
2519
2520 lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
2521 hi = lo >> 56;
2522 lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
2523 hi = (hi << 8) | (lo >> 56);
2524 lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
2525 hi = (hi << 8) | (lo >> 56);
2526 lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
2527 hi = (hi << 8) | (lo >> 56);
2528 lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
2529 hi = (hi << 8) | (lo >> 56);
2530 lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
2531 hi = (hi << 8) | (lo >> 56);
2532 lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
2533 hi = (hi << 8) | (lo >> 56);
2534 lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
2535
2536 ULong m0 = -1;
2537 m0 /= 255;
2538 tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
2539 tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
2540 tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
2541 tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
2542 tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
2543 tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
2544 tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
2545
2546 return which ? hi : lo;
2547 }
2548
2549
2550 /* CALLED FROM GENERATED CODE */
2551 /* DIRTY HELPER (non-referentially-transparent) */
2552 /* Horrible hack. On non-amd64 platforms, return 1. */
amd64g_dirtyhelper_RDTSC(void)2553 ULong amd64g_dirtyhelper_RDTSC ( void )
2554 {
2555 # if defined(__x86_64__)
2556 UInt eax, edx;
2557 __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
2558 return (((ULong)edx) << 32) | ((ULong)eax);
2559 # else
2560 return 1ULL;
2561 # endif
2562 }
2563
2564
2565 /* CALLED FROM GENERATED CODE */
2566 /* DIRTY HELPER (non-referentially-transparent) */
2567 /* Horrible hack. On non-amd64 platforms, return 0. */
amd64g_dirtyhelper_IN(ULong portno,ULong sz)2568 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
2569 {
2570 # if defined(__x86_64__)
2571 ULong r = 0;
2572 portno &= 0xFFFF;
2573 switch (sz) {
2574 case 4:
2575 __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
2576 : "=a" (r) : "Nd" (portno));
2577 break;
2578 case 2:
2579 __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
2580 : "=a" (r) : "Nd" (portno));
2581 break;
2582 case 1:
2583 __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
2584 : "=a" (r) : "Nd" (portno));
2585 break;
2586 default:
2587 break; /* note: no 64-bit version of insn exists */
2588 }
2589 return r;
2590 # else
2591 return 0;
2592 # endif
2593 }
2594
2595
2596 /* CALLED FROM GENERATED CODE */
2597 /* DIRTY HELPER (non-referentially-transparent) */
2598 /* Horrible hack. On non-amd64 platforms, do nothing. */
amd64g_dirtyhelper_OUT(ULong portno,ULong data,ULong sz)2599 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
2600 {
2601 # if defined(__x86_64__)
2602 portno &= 0xFFFF;
2603 switch (sz) {
2604 case 4:
2605 __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
2606 : : "a" (data), "Nd" (portno));
2607 break;
2608 case 2:
2609 __asm__ __volatile__("outw %w0, %w1"
2610 : : "a" (data), "Nd" (portno));
2611 break;
2612 case 1:
2613 __asm__ __volatile__("outb %b0, %w1"
2614 : : "a" (data), "Nd" (portno));
2615 break;
2616 default:
2617 break; /* note: no 64-bit version of insn exists */
2618 }
2619 # else
2620 /* do nothing */
2621 # endif
2622 }
2623
2624 /* CALLED FROM GENERATED CODE */
2625 /* DIRTY HELPER (non-referentially-transparent) */
2626 /* Horrible hack. On non-amd64 platforms, do nothing. */
2627 /* op = 0: call the native SGDT instruction.
2628 op = 1: call the native SIDT instruction.
2629 */
amd64g_dirtyhelper_SxDT(void * address,ULong op)2630 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
2631 # if defined(__x86_64__)
2632 switch (op) {
2633 case 0:
2634 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2635 break;
2636 case 1:
2637 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2638 break;
2639 default:
2640 vpanic("amd64g_dirtyhelper_SxDT");
2641 }
2642 # else
2643 /* do nothing */
2644 UChar* p = (UChar*)address;
2645 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2646 p[6] = p[7] = p[8] = p[9] = 0;
2647 # endif
2648 }
2649
2650 /*---------------------------------------------------------------*/
2651 /*--- Helpers for MMX/SSE/SSE2. ---*/
2652 /*---------------------------------------------------------------*/
2653
abdU8(UChar xx,UChar yy)2654 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2655 return toUChar(xx>yy ? xx-yy : yy-xx);
2656 }
2657
mk32x2(UInt w1,UInt w0)2658 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2659 return (((ULong)w1) << 32) | ((ULong)w0);
2660 }
2661
sel16x4_3(ULong w64)2662 static inline UShort sel16x4_3 ( ULong w64 ) {
2663 UInt hi32 = toUInt(w64 >> 32);
2664 return toUShort(hi32 >> 16);
2665 }
sel16x4_2(ULong w64)2666 static inline UShort sel16x4_2 ( ULong w64 ) {
2667 UInt hi32 = toUInt(w64 >> 32);
2668 return toUShort(hi32);
2669 }
sel16x4_1(ULong w64)2670 static inline UShort sel16x4_1 ( ULong w64 ) {
2671 UInt lo32 = toUInt(w64);
2672 return toUShort(lo32 >> 16);
2673 }
sel16x4_0(ULong w64)2674 static inline UShort sel16x4_0 ( ULong w64 ) {
2675 UInt lo32 = toUInt(w64);
2676 return toUShort(lo32);
2677 }
2678
sel8x8_7(ULong w64)2679 static inline UChar sel8x8_7 ( ULong w64 ) {
2680 UInt hi32 = toUInt(w64 >> 32);
2681 return toUChar(hi32 >> 24);
2682 }
sel8x8_6(ULong w64)2683 static inline UChar sel8x8_6 ( ULong w64 ) {
2684 UInt hi32 = toUInt(w64 >> 32);
2685 return toUChar(hi32 >> 16);
2686 }
sel8x8_5(ULong w64)2687 static inline UChar sel8x8_5 ( ULong w64 ) {
2688 UInt hi32 = toUInt(w64 >> 32);
2689 return toUChar(hi32 >> 8);
2690 }
sel8x8_4(ULong w64)2691 static inline UChar sel8x8_4 ( ULong w64 ) {
2692 UInt hi32 = toUInt(w64 >> 32);
2693 return toUChar(hi32 >> 0);
2694 }
sel8x8_3(ULong w64)2695 static inline UChar sel8x8_3 ( ULong w64 ) {
2696 UInt lo32 = toUInt(w64);
2697 return toUChar(lo32 >> 24);
2698 }
sel8x8_2(ULong w64)2699 static inline UChar sel8x8_2 ( ULong w64 ) {
2700 UInt lo32 = toUInt(w64);
2701 return toUChar(lo32 >> 16);
2702 }
sel8x8_1(ULong w64)2703 static inline UChar sel8x8_1 ( ULong w64 ) {
2704 UInt lo32 = toUInt(w64);
2705 return toUChar(lo32 >> 8);
2706 }
sel8x8_0(ULong w64)2707 static inline UChar sel8x8_0 ( ULong w64 ) {
2708 UInt lo32 = toUInt(w64);
2709 return toUChar(lo32 >> 0);
2710 }
2711
2712 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_pmaddwd(ULong xx,ULong yy)2713 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2714 {
2715 return
2716 mk32x2(
2717 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2718 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2719 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2720 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2721 );
2722 }
2723
2724 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_pmovmskb(ULong xx)2725 ULong amd64g_calculate_mmx_pmovmskb ( ULong xx )
2726 {
2727 ULong r = 0;
2728 if (xx & (1ULL << (64-1))) r |= (1<<7);
2729 if (xx & (1ULL << (56-1))) r |= (1<<6);
2730 if (xx & (1ULL << (48-1))) r |= (1<<5);
2731 if (xx & (1ULL << (40-1))) r |= (1<<4);
2732 if (xx & (1ULL << (32-1))) r |= (1<<3);
2733 if (xx & (1ULL << (24-1))) r |= (1<<2);
2734 if (xx & (1ULL << (16-1))) r |= (1<<1);
2735 if (xx & (1ULL << ( 8-1))) r |= (1<<0);
2736 return r;
2737 }
2738
2739 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_psadbw(ULong xx,ULong yy)2740 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2741 {
2742 UInt t = 0;
2743 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2744 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2745 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2746 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2747 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2748 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2749 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2750 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2751 t &= 0xFFFF;
2752 return (ULong)t;
2753 }
2754
2755 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_sse_pmovmskb(ULong w64hi,ULong w64lo)2756 ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
2757 {
2758 ULong rHi8 = amd64g_calculate_mmx_pmovmskb ( w64hi );
2759 ULong rLo8 = amd64g_calculate_mmx_pmovmskb ( w64lo );
2760 return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
2761 }
2762
2763 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32b(ULong crcIn,ULong b)2764 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
2765 {
2766 UInt i;
2767 ULong crc = (b & 0xFFULL) ^ crcIn;
2768 for (i = 0; i < 8; i++)
2769 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
2770 return crc;
2771 }
2772
2773 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32w(ULong crcIn,ULong w)2774 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w )
2775 {
2776 UInt i;
2777 ULong crc = (w & 0xFFFFULL) ^ crcIn;
2778 for (i = 0; i < 16; i++)
2779 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
2780 return crc;
2781 }
2782
2783 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32l(ULong crcIn,ULong l)2784 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l )
2785 {
2786 UInt i;
2787 ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn;
2788 for (i = 0; i < 32; i++)
2789 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
2790 return crc;
2791 }
2792
2793 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32q(ULong crcIn,ULong q)2794 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q )
2795 {
2796 ULong crc = amd64g_calc_crc32l(crcIn, q);
2797 return amd64g_calc_crc32l(crc, q >> 32);
2798 }
2799
2800
2801 /*---------------------------------------------------------------*/
2802 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/
2803 /*---------------------------------------------------------------*/
2804
zmask_from_V128(V128 * arg)2805 static UInt zmask_from_V128 ( V128* arg )
2806 {
2807 UInt i, res = 0;
2808 for (i = 0; i < 16; i++) {
2809 res |= ((arg->w8[i] == 0) ? 1 : 0) << i;
2810 }
2811 return res;
2812 }
2813
2814 /* Helps with PCMP{I,E}STR{I,M}.
2815
2816 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
2817 actually it could be a clean helper, but for the fact that we can't
2818 pass by value 2 x V128 to a clean helper, nor have one returned.)
2819 Reads guest state, writes to guest state for the xSTRM cases, no
2820 accesses of memory, is a pure function.
2821
2822 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
2823 the callee knows which I/E and I/M variant it is dealing with and
2824 what the specific operation is. 4th byte of opcode is in the range
2825 0x60 to 0x63:
2826 istri 66 0F 3A 63
2827 istrm 66 0F 3A 62
2828 estri 66 0F 3A 61
2829 estrm 66 0F 3A 60
2830
2831 gstOffL and gstOffR are the guest state offsets for the two XMM
2832 register inputs. We never have to deal with the memory case since
2833 that is handled by pre-loading the relevant value into the fake
2834 XMM16 register.
2835
2836 For ESTRx variants, edxIN and eaxIN hold the values of those two
2837 registers.
2838
2839 In all cases, the bottom 16 bits of the result contain the new
2840 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
2841 result hold the new %ecx value. For xSTRM variants, the helper
2842 writes the result directly to the guest XMM0.
2843
2844 Declarable side effects: in all cases, reads guest state at
2845 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
2846 guest_XMM0.
2847
2848 Is expected to be called with opc_and_imm combinations which have
2849 actually been validated, and will assert if otherwise. The front
2850 end should ensure we're only called with verified values.
2851 */
amd64g_dirtyhelper_PCMPxSTRx(VexGuestAMD64State * gst,HWord opc4_and_imm,HWord gstOffL,HWord gstOffR,HWord edxIN,HWord eaxIN)2852 ULong amd64g_dirtyhelper_PCMPxSTRx (
2853 VexGuestAMD64State* gst,
2854 HWord opc4_and_imm,
2855 HWord gstOffL, HWord gstOffR,
2856 HWord edxIN, HWord eaxIN
2857 )
2858 {
2859 HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
2860 HWord imm8 = opc4_and_imm & 0xFF;
2861 HWord isISTRx = opc4 & 2;
2862 HWord isxSTRM = (opc4 & 1) ^ 1;
2863 vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
2864 vassert((imm8 & 1) == 0); /* we support byte-size cases only */
2865
2866 // where the args are
2867 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
2868 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
2869
2870 /* Create the arg validity masks, either from the vectors
2871 themselves or from the supplied edx/eax values. */
2872 // FIXME: this is only right for the 8-bit data cases.
2873 // At least that is asserted above.
2874 UInt zmaskL, zmaskR;
2875 if (isISTRx) {
2876 zmaskL = zmask_from_V128(argL);
2877 zmaskR = zmask_from_V128(argR);
2878 } else {
2879 Int tmp;
2880 tmp = edxIN & 0xFFFFFFFF;
2881 if (tmp < -16) tmp = -16;
2882 if (tmp > 16) tmp = 16;
2883 if (tmp < 0) tmp = -tmp;
2884 vassert(tmp >= 0 && tmp <= 16);
2885 zmaskL = (1 << tmp) & 0xFFFF;
2886 tmp = eaxIN & 0xFFFFFFFF;
2887 if (tmp < -16) tmp = -16;
2888 if (tmp > 16) tmp = 16;
2889 if (tmp < 0) tmp = -tmp;
2890 vassert(tmp >= 0 && tmp <= 16);
2891 zmaskR = (1 << tmp) & 0xFFFF;
2892 }
2893
2894 // temp spot for the resulting flags and vector.
2895 V128 resV;
2896 UInt resOSZACP;
2897
2898 // do the meyaath
2899 Bool ok = compute_PCMPxSTRx (
2900 &resV, &resOSZACP, argL, argR,
2901 zmaskL, zmaskR, imm8, (Bool)isxSTRM
2902 );
2903
2904 // front end shouldn't pass us any imm8 variants we can't
2905 // handle. Hence:
2906 vassert(ok);
2907
2908 // So, finally we need to get the results back to the caller.
2909 // In all cases, the new OSZACP value is the lowest 16 of
2910 // the return value.
2911 if (isxSTRM) {
2912 /* gst->guest_XMM0 = resV; */ // gcc don't like that
2913 gst->guest_XMM0[0] = resV.w32[0];
2914 gst->guest_XMM0[1] = resV.w32[1];
2915 gst->guest_XMM0[2] = resV.w32[2];
2916 gst->guest_XMM0[3] = resV.w32[3];
2917 return resOSZACP & 0x8D5;
2918 } else {
2919 UInt newECX = resV.w32[0] & 0xFFFF;
2920 return (newECX << 16) | (resOSZACP & 0x8D5);
2921 }
2922 }
2923
2924
2925 /*---------------------------------------------------------------*/
2926 /*--- Helpers for dealing with, and describing, ---*/
2927 /*--- guest state as a whole. ---*/
2928 /*---------------------------------------------------------------*/
2929
2930 /* Initialise the entire amd64 guest state. */
2931 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_initialise(VexGuestAMD64State * vex_state)2932 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
2933 {
2934 vex_state->guest_RAX = 0;
2935 vex_state->guest_RCX = 0;
2936 vex_state->guest_RDX = 0;
2937 vex_state->guest_RBX = 0;
2938 vex_state->guest_RSP = 0;
2939 vex_state->guest_RBP = 0;
2940 vex_state->guest_RSI = 0;
2941 vex_state->guest_RDI = 0;
2942 vex_state->guest_R8 = 0;
2943 vex_state->guest_R9 = 0;
2944 vex_state->guest_R10 = 0;
2945 vex_state->guest_R11 = 0;
2946 vex_state->guest_R12 = 0;
2947 vex_state->guest_R13 = 0;
2948 vex_state->guest_R14 = 0;
2949 vex_state->guest_R15 = 0;
2950
2951 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
2952 vex_state->guest_CC_DEP1 = 0;
2953 vex_state->guest_CC_DEP2 = 0;
2954 vex_state->guest_CC_NDEP = 0;
2955
2956 vex_state->guest_DFLAG = 1; /* forwards */
2957 vex_state->guest_IDFLAG = 0;
2958
2959 /* HACK: represent the offset associated with %fs==0. This
2960 assumes that %fs is only ever zero. */
2961 vex_state->guest_FS_ZERO = 0;
2962
2963 vex_state->guest_RIP = 0;
2964
2965 /* Initialise the simulated FPU */
2966 amd64g_dirtyhelper_FINIT( vex_state );
2967
2968 /* Initialise the SSE state. */
2969 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2970
2971 vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
2972 SSEZERO(vex_state->guest_XMM0);
2973 SSEZERO(vex_state->guest_XMM1);
2974 SSEZERO(vex_state->guest_XMM2);
2975 SSEZERO(vex_state->guest_XMM3);
2976 SSEZERO(vex_state->guest_XMM4);
2977 SSEZERO(vex_state->guest_XMM5);
2978 SSEZERO(vex_state->guest_XMM6);
2979 SSEZERO(vex_state->guest_XMM7);
2980 SSEZERO(vex_state->guest_XMM8);
2981 SSEZERO(vex_state->guest_XMM9);
2982 SSEZERO(vex_state->guest_XMM10);
2983 SSEZERO(vex_state->guest_XMM11);
2984 SSEZERO(vex_state->guest_XMM12);
2985 SSEZERO(vex_state->guest_XMM13);
2986 SSEZERO(vex_state->guest_XMM14);
2987 SSEZERO(vex_state->guest_XMM15);
2988 SSEZERO(vex_state->guest_XMM16);
2989
2990 # undef SSEZERO
2991
2992 vex_state->guest_EMWARN = EmWarn_NONE;
2993
2994 /* These should not ever be either read or written, but we
2995 initialise them anyway. */
2996 vex_state->guest_TISTART = 0;
2997 vex_state->guest_TILEN = 0;
2998
2999 vex_state->guest_NRADDR = 0;
3000 vex_state->guest_SC_CLASS = 0;
3001 vex_state->guest_GS_0x60 = 0;
3002
3003 vex_state->guest_IP_AT_SYSCALL = 0;
3004 /* vex_state->padding = 0; */
3005 }
3006
3007
3008 /* Figure out if any part of the guest state contained in minoff
3009 .. maxoff requires precise memory exceptions. If in doubt return
3010 True (but this is generates significantly slower code).
3011
3012 By default we enforce precise exns for guest %RSP, %RBP and %RIP
3013 only. These are the minimum needed to extract correct stack
3014 backtraces from amd64 code.
3015 */
guest_amd64_state_requires_precise_mem_exns(Int minoff,Int maxoff)3016 Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff,
3017 Int maxoff)
3018 {
3019 Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
3020 Int rbp_max = rbp_min + 8 - 1;
3021 Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
3022 Int rsp_max = rsp_min + 8 - 1;
3023 Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
3024 Int rip_max = rip_min + 8 - 1;
3025
3026 if (maxoff < rbp_min || minoff > rbp_max) {
3027 /* no overlap with rbp */
3028 } else {
3029 return True;
3030 }
3031
3032 if (maxoff < rsp_min || minoff > rsp_max) {
3033 /* no overlap with rsp */
3034 } else {
3035 return True;
3036 }
3037
3038 if (maxoff < rip_min || minoff > rip_max) {
3039 /* no overlap with eip */
3040 } else {
3041 return True;
3042 }
3043
3044 return False;
3045 }
3046
3047
3048 #define ALWAYSDEFD(field) \
3049 { offsetof(VexGuestAMD64State, field), \
3050 (sizeof ((VexGuestAMD64State*)0)->field) }
3051
3052 VexGuestLayout
3053 amd64guest_layout
3054 = {
3055 /* Total size of the guest state, in bytes. */
3056 .total_sizeB = sizeof(VexGuestAMD64State),
3057
3058 /* Describe the stack pointer. */
3059 .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
3060 .sizeof_SP = 8,
3061
3062 /* Describe the frame pointer. */
3063 .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
3064 .sizeof_FP = 8,
3065
3066 /* Describe the instruction pointer. */
3067 .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
3068 .sizeof_IP = 8,
3069
3070 /* Describe any sections to be regarded by Memcheck as
3071 'always-defined'. */
3072 .n_alwaysDefd = 16,
3073
3074 /* flags thunk: OP and NDEP are always defd, whereas DEP1
3075 and DEP2 have to be tracked. See detailed comment in
3076 gdefs.h on meaning of thunk fields. */
3077 .alwaysDefd
3078 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
3079 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
3080 /* 2 */ ALWAYSDEFD(guest_DFLAG),
3081 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
3082 /* 4 */ ALWAYSDEFD(guest_RIP),
3083 /* 5 */ ALWAYSDEFD(guest_FS_ZERO),
3084 /* 6 */ ALWAYSDEFD(guest_FTOP),
3085 /* 7 */ ALWAYSDEFD(guest_FPTAG),
3086 /* 8 */ ALWAYSDEFD(guest_FPROUND),
3087 /* 9 */ ALWAYSDEFD(guest_FC3210),
3088 // /* */ ALWAYSDEFD(guest_CS),
3089 // /* */ ALWAYSDEFD(guest_DS),
3090 // /* */ ALWAYSDEFD(guest_ES),
3091 // /* */ ALWAYSDEFD(guest_FS),
3092 // /* */ ALWAYSDEFD(guest_GS),
3093 // /* */ ALWAYSDEFD(guest_SS),
3094 // /* */ ALWAYSDEFD(guest_LDT),
3095 // /* */ ALWAYSDEFD(guest_GDT),
3096 /* 10 */ ALWAYSDEFD(guest_EMWARN),
3097 /* 11 */ ALWAYSDEFD(guest_SSEROUND),
3098 /* 12 */ ALWAYSDEFD(guest_TISTART),
3099 /* 13 */ ALWAYSDEFD(guest_TILEN),
3100 /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
3101 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
3102 }
3103 };
3104
3105
3106 /*---------------------------------------------------------------*/
3107 /*--- end guest_amd64_helpers.c ---*/
3108 /*---------------------------------------------------------------*/
3109