1
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_x86_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2012 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_emwarn.h"
38 #include "libvex_guest_x86.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41
42 #include "main_util.h"
43 #include "guest_generic_bb_to_IR.h"
44 #include "guest_x86_defs.h"
45 #include "guest_generic_x87.h"
46
47
48 /* This file contains helper functions for x86 guest code.
49 Calls to these functions are generated by the back end.
50 These calls are of course in the host machine code and
51 this file will be compiled to host machine code, so that
52 all makes sense.
53
54 Only change the signatures of these helper functions very
55 carefully. If you change the signature here, you'll have to change
56 the parameters passed to it in the IR calls constructed by
57 guest-x86/toIR.c.
58
59 The convention used is that all functions called from generated
60 code are named x86g_<something>, and any function whose name lacks
61 that prefix is not called from generated code. Note that some
62 LibVEX_* functions can however be called by VEX's client, but that
63 is not the same as calling them from VEX-generated code.
64 */
65
66
67 /* Set to 1 to get detailed profiling info about use of the flag
68 machinery. */
69 #define PROFILE_EFLAGS 0
70
71
72 /*---------------------------------------------------------------*/
73 /*--- %eflags run-time helpers. ---*/
74 /*---------------------------------------------------------------*/
75
76 static const UChar parity_table[256] = {
77 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
78 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
79 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
80 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
81 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
82 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
83 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
84 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
85 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
86 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
87 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
88 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
89 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
90 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
91 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
92 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
93 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
94 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
95 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
96 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
97 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
98 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
99 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
100 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
101 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
102 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
103 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
104 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
105 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
106 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
107 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
108 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
109 };
110
111 /* generalised left-shifter */
lshift(Int x,Int n)112 inline static Int lshift ( Int x, Int n )
113 {
114 if (n >= 0)
115 return x << n;
116 else
117 return x >> (-n);
118 }
119
120 /* identity on ULong */
idULong(ULong x)121 static inline ULong idULong ( ULong x )
122 {
123 return x;
124 }
125
126
127 #define PREAMBLE(__data_bits) \
128 /* const */ UInt DATA_MASK \
129 = __data_bits==8 ? 0xFF \
130 : (__data_bits==16 ? 0xFFFF \
131 : 0xFFFFFFFF); \
132 /* const */ UInt SIGN_MASK = 1 << (__data_bits - 1); \
133 /* const */ UInt CC_DEP1 = cc_dep1_formal; \
134 /* const */ UInt CC_DEP2 = cc_dep2_formal; \
135 /* const */ UInt CC_NDEP = cc_ndep_formal; \
136 /* Four bogus assignments, which hopefully gcc can */ \
137 /* optimise away, and which stop it complaining about */ \
138 /* unused variables. */ \
139 SIGN_MASK = SIGN_MASK; \
140 DATA_MASK = DATA_MASK; \
141 CC_DEP2 = CC_DEP2; \
142 CC_NDEP = CC_NDEP;
143
144
145 /*-------------------------------------------------------------*/
146
147 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
148 { \
149 PREAMBLE(DATA_BITS); \
150 { Int cf, pf, af, zf, sf, of; \
151 Int argL, argR, res; \
152 argL = CC_DEP1; \
153 argR = CC_DEP2; \
154 res = argL + argR; \
155 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
156 pf = parity_table[(UChar)res]; \
157 af = (res ^ argL ^ argR) & 0x10; \
158 zf = ((DATA_UTYPE)res == 0) << 6; \
159 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
160 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
161 12 - DATA_BITS) & X86G_CC_MASK_O; \
162 return cf | pf | af | zf | sf | of; \
163 } \
164 }
165
166 /*-------------------------------------------------------------*/
167
168 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
169 { \
170 PREAMBLE(DATA_BITS); \
171 { Int cf, pf, af, zf, sf, of; \
172 Int argL, argR, res; \
173 argL = CC_DEP1; \
174 argR = CC_DEP2; \
175 res = argL - argR; \
176 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
177 pf = parity_table[(UChar)res]; \
178 af = (res ^ argL ^ argR) & 0x10; \
179 zf = ((DATA_UTYPE)res == 0) << 6; \
180 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
181 of = lshift((argL ^ argR) & (argL ^ res), \
182 12 - DATA_BITS) & X86G_CC_MASK_O; \
183 return cf | pf | af | zf | sf | of; \
184 } \
185 }
186
187 /*-------------------------------------------------------------*/
188
189 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
190 { \
191 PREAMBLE(DATA_BITS); \
192 { Int cf, pf, af, zf, sf, of; \
193 Int argL, argR, oldC, res; \
194 oldC = CC_NDEP & X86G_CC_MASK_C; \
195 argL = CC_DEP1; \
196 argR = CC_DEP2 ^ oldC; \
197 res = (argL + argR) + oldC; \
198 if (oldC) \
199 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
200 else \
201 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
202 pf = parity_table[(UChar)res]; \
203 af = (res ^ argL ^ argR) & 0x10; \
204 zf = ((DATA_UTYPE)res == 0) << 6; \
205 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
206 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
207 12 - DATA_BITS) & X86G_CC_MASK_O; \
208 return cf | pf | af | zf | sf | of; \
209 } \
210 }
211
212 /*-------------------------------------------------------------*/
213
214 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
215 { \
216 PREAMBLE(DATA_BITS); \
217 { Int cf, pf, af, zf, sf, of; \
218 Int argL, argR, oldC, res; \
219 oldC = CC_NDEP & X86G_CC_MASK_C; \
220 argL = CC_DEP1; \
221 argR = CC_DEP2 ^ oldC; \
222 res = (argL - argR) - oldC; \
223 if (oldC) \
224 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
225 else \
226 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
227 pf = parity_table[(UChar)res]; \
228 af = (res ^ argL ^ argR) & 0x10; \
229 zf = ((DATA_UTYPE)res == 0) << 6; \
230 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
231 of = lshift((argL ^ argR) & (argL ^ res), \
232 12 - DATA_BITS) & X86G_CC_MASK_O; \
233 return cf | pf | af | zf | sf | of; \
234 } \
235 }
236
237 /*-------------------------------------------------------------*/
238
239 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
240 { \
241 PREAMBLE(DATA_BITS); \
242 { Int cf, pf, af, zf, sf, of; \
243 cf = 0; \
244 pf = parity_table[(UChar)CC_DEP1]; \
245 af = 0; \
246 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
247 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
248 of = 0; \
249 return cf | pf | af | zf | sf | of; \
250 } \
251 }
252
253 /*-------------------------------------------------------------*/
254
255 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
256 { \
257 PREAMBLE(DATA_BITS); \
258 { Int cf, pf, af, zf, sf, of; \
259 Int argL, argR, res; \
260 res = CC_DEP1; \
261 argL = res - 1; \
262 argR = 1; \
263 cf = CC_NDEP & X86G_CC_MASK_C; \
264 pf = parity_table[(UChar)res]; \
265 af = (res ^ argL ^ argR) & 0x10; \
266 zf = ((DATA_UTYPE)res == 0) << 6; \
267 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
268 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
269 return cf | pf | af | zf | sf | of; \
270 } \
271 }
272
273 /*-------------------------------------------------------------*/
274
275 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
276 { \
277 PREAMBLE(DATA_BITS); \
278 { Int cf, pf, af, zf, sf, of; \
279 Int argL, argR, res; \
280 res = CC_DEP1; \
281 argL = res + 1; \
282 argR = 1; \
283 cf = CC_NDEP & X86G_CC_MASK_C; \
284 pf = parity_table[(UChar)res]; \
285 af = (res ^ argL ^ argR) & 0x10; \
286 zf = ((DATA_UTYPE)res == 0) << 6; \
287 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
288 of = ((res & DATA_MASK) \
289 == ((UInt)SIGN_MASK - 1)) << 11; \
290 return cf | pf | af | zf | sf | of; \
291 } \
292 }
293
294 /*-------------------------------------------------------------*/
295
296 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
297 { \
298 PREAMBLE(DATA_BITS); \
299 { Int cf, pf, af, zf, sf, of; \
300 cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \
301 pf = parity_table[(UChar)CC_DEP1]; \
302 af = 0; /* undefined */ \
303 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
304 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
305 /* of is defined if shift count == 1 */ \
306 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
307 & X86G_CC_MASK_O; \
308 return cf | pf | af | zf | sf | of; \
309 } \
310 }
311
312 /*-------------------------------------------------------------*/
313
314 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
315 { \
316 PREAMBLE(DATA_BITS); \
317 { Int cf, pf, af, zf, sf, of; \
318 cf = CC_DEP2 & 1; \
319 pf = parity_table[(UChar)CC_DEP1]; \
320 af = 0; /* undefined */ \
321 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
322 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
323 /* of is defined if shift count == 1 */ \
324 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
325 & X86G_CC_MASK_O; \
326 return cf | pf | af | zf | sf | of; \
327 } \
328 }
329
330 /*-------------------------------------------------------------*/
331
332 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
333 /* DEP1 = result, NDEP = old flags */
334 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
335 { \
336 PREAMBLE(DATA_BITS); \
337 { Int fl \
338 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
339 | (X86G_CC_MASK_C & CC_DEP1) \
340 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
341 11-(DATA_BITS-1)) \
342 ^ lshift(CC_DEP1, 11))); \
343 return fl; \
344 } \
345 }
346
347 /*-------------------------------------------------------------*/
348
349 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
350 /* DEP1 = result, NDEP = old flags */
351 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
352 { \
353 PREAMBLE(DATA_BITS); \
354 { Int fl \
355 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
356 | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
357 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
358 11-(DATA_BITS-1)) \
359 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
360 return fl; \
361 } \
362 }
363
364 /*-------------------------------------------------------------*/
365
366 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
367 DATA_U2TYPE, NARROWto2U) \
368 { \
369 PREAMBLE(DATA_BITS); \
370 { Int cf, pf, af, zf, sf, of; \
371 DATA_UTYPE hi; \
372 DATA_UTYPE lo \
373 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
374 * ((DATA_UTYPE)CC_DEP2) ); \
375 DATA_U2TYPE rr \
376 = NARROWto2U( \
377 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
378 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
379 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
380 cf = (hi != 0); \
381 pf = parity_table[(UChar)lo]; \
382 af = 0; /* undefined */ \
383 zf = (lo == 0) << 6; \
384 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
385 of = cf << 11; \
386 return cf | pf | af | zf | sf | of; \
387 } \
388 }
389
390 /*-------------------------------------------------------------*/
391
392 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
393 DATA_S2TYPE, NARROWto2S) \
394 { \
395 PREAMBLE(DATA_BITS); \
396 { Int cf, pf, af, zf, sf, of; \
397 DATA_STYPE hi; \
398 DATA_STYPE lo \
399 = NARROWtoS( ((DATA_STYPE)CC_DEP1) \
400 * ((DATA_STYPE)CC_DEP2) ); \
401 DATA_S2TYPE rr \
402 = NARROWto2S( \
403 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
404 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
405 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
406 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
407 pf = parity_table[(UChar)lo]; \
408 af = 0; /* undefined */ \
409 zf = (lo == 0) << 6; \
410 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
411 of = cf << 11; \
412 return cf | pf | af | zf | sf | of; \
413 } \
414 }
415
416
417 #if PROFILE_EFLAGS
418
419 static Bool initted = False;
420
421 /* C flag, fast route */
422 static UInt tabc_fast[X86G_CC_OP_NUMBER];
423 /* C flag, slow route */
424 static UInt tabc_slow[X86G_CC_OP_NUMBER];
425 /* table for calculate_cond */
426 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
427 /* total entry counts for calc_all, calc_c, calc_cond. */
428 static UInt n_calc_all = 0;
429 static UInt n_calc_c = 0;
430 static UInt n_calc_cond = 0;
431
432 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
433
434
showCounts(void)435 static void showCounts ( void )
436 {
437 Int op, co;
438 Char ch;
439 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
440 n_calc_all, n_calc_cond, n_calc_c);
441
442 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
443 " S NS P NP L NL LE NLE\n");
444 vex_printf(" -----------------------------------------------------"
445 "----------------------------------------\n");
446 for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
447
448 ch = ' ';
449 if (op > 0 && (op-1) % 3 == 0)
450 ch = 'B';
451 if (op > 0 && (op-1) % 3 == 1)
452 ch = 'W';
453 if (op > 0 && (op-1) % 3 == 2)
454 ch = 'L';
455
456 vex_printf("%2d%c: ", op, ch);
457 vex_printf("%6u ", tabc_slow[op]);
458 vex_printf("%6u ", tabc_fast[op]);
459 for (co = 0; co < 16; co++) {
460 Int n = tab_cond[op][co];
461 if (n >= 1000) {
462 vex_printf(" %3dK", n / 1000);
463 } else
464 if (n >= 0) {
465 vex_printf(" %3d ", n );
466 } else {
467 vex_printf(" ");
468 }
469 }
470 vex_printf("\n");
471 }
472 vex_printf("\n");
473 }
474
initCounts(void)475 static void initCounts ( void )
476 {
477 Int op, co;
478 initted = True;
479 for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
480 tabc_fast[op] = tabc_slow[op] = 0;
481 for (co = 0; co < 16; co++)
482 tab_cond[op][co] = 0;
483 }
484 }
485
486 #endif /* PROFILE_EFLAGS */
487
488
489 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
490 /* Calculate all the 6 flags from the supplied thunk parameters.
491 Worker function, not directly called from generated code. */
492 static
x86g_calculate_eflags_all_WRK(UInt cc_op,UInt cc_dep1_formal,UInt cc_dep2_formal,UInt cc_ndep_formal)493 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
494 UInt cc_dep1_formal,
495 UInt cc_dep2_formal,
496 UInt cc_ndep_formal )
497 {
498 switch (cc_op) {
499 case X86G_CC_OP_COPY:
500 return cc_dep1_formal
501 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
502 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
503
504 case X86G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
505 case X86G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
506 case X86G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
507
508 case X86G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
509 case X86G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
510 case X86G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
511
512 case X86G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
513 case X86G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
514 case X86G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
515
516 case X86G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
517 case X86G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
518 case X86G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
519
520 case X86G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
521 case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
522 case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
523
524 case X86G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
525 case X86G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
526 case X86G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
527
528 case X86G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
529 case X86G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
530 case X86G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
531
532 case X86G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
533 case X86G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
534 case X86G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
535
536 case X86G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
537 case X86G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
538 case X86G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
539
540 case X86G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
541 case X86G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
542 case X86G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
543
544 case X86G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
545 case X86G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
546 case X86G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
547
548 case X86G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
549 UShort, toUShort );
550 case X86G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
551 UInt, toUInt );
552 case X86G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
553 ULong, idULong );
554
555 case X86G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
556 Short, toUShort );
557 case X86G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
558 Int, toUInt );
559 case X86G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
560 Long, idULong );
561
562 default:
563 /* shouldn't really make these calls from generated code */
564 vex_printf("x86g_calculate_eflags_all_WRK(X86)"
565 "( %u, 0x%x, 0x%x, 0x%x )\n",
566 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
567 vpanic("x86g_calculate_eflags_all_WRK(X86)");
568 }
569 }
570
571
572 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
573 /* Calculate all the 6 flags from the supplied thunk parameters. */
x86g_calculate_eflags_all(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)574 UInt x86g_calculate_eflags_all ( UInt cc_op,
575 UInt cc_dep1,
576 UInt cc_dep2,
577 UInt cc_ndep )
578 {
579 # if PROFILE_EFLAGS
580 if (!initted) initCounts();
581 n_calc_all++;
582 if (SHOW_COUNTS_NOW) showCounts();
583 # endif
584 return
585 x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
586 }
587
588
589 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
590 /* Calculate just the carry flag from the supplied thunk parameters. */
591 VEX_REGPARM(3)
x86g_calculate_eflags_c(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)592 UInt x86g_calculate_eflags_c ( UInt cc_op,
593 UInt cc_dep1,
594 UInt cc_dep2,
595 UInt cc_ndep )
596 {
597 # if PROFILE_EFLAGS
598 if (!initted) initCounts();
599 n_calc_c++;
600 tabc_fast[cc_op]++;
601 if (SHOW_COUNTS_NOW) showCounts();
602 # endif
603
604 /* Fast-case some common ones. */
605 switch (cc_op) {
606 case X86G_CC_OP_LOGICL:
607 case X86G_CC_OP_LOGICW:
608 case X86G_CC_OP_LOGICB:
609 return 0;
610 case X86G_CC_OP_SUBL:
611 return ((UInt)cc_dep1) < ((UInt)cc_dep2)
612 ? X86G_CC_MASK_C : 0;
613 case X86G_CC_OP_SUBW:
614 return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
615 ? X86G_CC_MASK_C : 0;
616 case X86G_CC_OP_SUBB:
617 return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
618 ? X86G_CC_MASK_C : 0;
619 case X86G_CC_OP_INCL:
620 case X86G_CC_OP_DECL:
621 return cc_ndep & X86G_CC_MASK_C;
622 default:
623 break;
624 }
625
626 # if PROFILE_EFLAGS
627 tabc_fast[cc_op]--;
628 tabc_slow[cc_op]++;
629 # endif
630
631 return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
632 & X86G_CC_MASK_C;
633 }
634
635
636 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
637 /* returns 1 or 0 */
x86g_calculate_condition(UInt cond,UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)638 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
639 UInt cc_op,
640 UInt cc_dep1,
641 UInt cc_dep2,
642 UInt cc_ndep )
643 {
644 UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
645 cc_dep2, cc_ndep);
646 UInt of,sf,zf,cf,pf;
647 UInt inv = cond & 1;
648
649 # if PROFILE_EFLAGS
650 if (!initted) initCounts();
651 tab_cond[cc_op][cond]++;
652 n_calc_cond++;
653 if (SHOW_COUNTS_NOW) showCounts();
654 # endif
655
656 switch (cond) {
657 case X86CondNO:
658 case X86CondO: /* OF == 1 */
659 of = eflags >> X86G_CC_SHIFT_O;
660 return 1 & (inv ^ of);
661
662 case X86CondNZ:
663 case X86CondZ: /* ZF == 1 */
664 zf = eflags >> X86G_CC_SHIFT_Z;
665 return 1 & (inv ^ zf);
666
667 case X86CondNB:
668 case X86CondB: /* CF == 1 */
669 cf = eflags >> X86G_CC_SHIFT_C;
670 return 1 & (inv ^ cf);
671 break;
672
673 case X86CondNBE:
674 case X86CondBE: /* (CF or ZF) == 1 */
675 cf = eflags >> X86G_CC_SHIFT_C;
676 zf = eflags >> X86G_CC_SHIFT_Z;
677 return 1 & (inv ^ (cf | zf));
678 break;
679
680 case X86CondNS:
681 case X86CondS: /* SF == 1 */
682 sf = eflags >> X86G_CC_SHIFT_S;
683 return 1 & (inv ^ sf);
684
685 case X86CondNP:
686 case X86CondP: /* PF == 1 */
687 pf = eflags >> X86G_CC_SHIFT_P;
688 return 1 & (inv ^ pf);
689
690 case X86CondNL:
691 case X86CondL: /* (SF xor OF) == 1 */
692 sf = eflags >> X86G_CC_SHIFT_S;
693 of = eflags >> X86G_CC_SHIFT_O;
694 return 1 & (inv ^ (sf ^ of));
695 break;
696
697 case X86CondNLE:
698 case X86CondLE: /* ((SF xor OF) or ZF) == 1 */
699 sf = eflags >> X86G_CC_SHIFT_S;
700 of = eflags >> X86G_CC_SHIFT_O;
701 zf = eflags >> X86G_CC_SHIFT_Z;
702 return 1 & (inv ^ ((sf ^ of) | zf));
703 break;
704
705 default:
706 /* shouldn't really make these calls from generated code */
707 vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
708 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
709 vpanic("x86g_calculate_condition");
710 }
711 }
712
713
714 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_get_eflags(VexGuestX86State * vex_state)715 UInt LibVEX_GuestX86_get_eflags ( /*IN*/VexGuestX86State* vex_state )
716 {
717 UInt eflags = x86g_calculate_eflags_all_WRK(
718 vex_state->guest_CC_OP,
719 vex_state->guest_CC_DEP1,
720 vex_state->guest_CC_DEP2,
721 vex_state->guest_CC_NDEP
722 );
723 UInt dflag = vex_state->guest_DFLAG;
724 vassert(dflag == 1 || dflag == 0xFFFFFFFF);
725 if (dflag == 0xFFFFFFFF)
726 eflags |= (1<<10);
727 if (vex_state->guest_IDFLAG == 1)
728 eflags |= (1<<21);
729 if (vex_state->guest_ACFLAG == 1)
730 eflags |= (1<<18);
731
732 return eflags;
733 }
734
735 /* VISIBLE TO LIBVEX CLIENT */
736 void
LibVEX_GuestX86_put_eflag_c(UInt new_carry_flag,VexGuestX86State * vex_state)737 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
738 /*MOD*/VexGuestX86State* vex_state )
739 {
740 UInt oszacp = x86g_calculate_eflags_all_WRK(
741 vex_state->guest_CC_OP,
742 vex_state->guest_CC_DEP1,
743 vex_state->guest_CC_DEP2,
744 vex_state->guest_CC_NDEP
745 );
746 if (new_carry_flag & 1) {
747 oszacp |= X86G_CC_MASK_C;
748 } else {
749 oszacp &= ~X86G_CC_MASK_C;
750 }
751 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
752 vex_state->guest_CC_DEP1 = oszacp;
753 vex_state->guest_CC_DEP2 = 0;
754 vex_state->guest_CC_NDEP = 0;
755 }
756
757
758 /*---------------------------------------------------------------*/
759 /*--- %eflags translation-time function specialisers. ---*/
760 /*--- These help iropt specialise calls the above run-time ---*/
761 /*--- %eflags functions. ---*/
762 /*---------------------------------------------------------------*/
763
764 /* Used by the optimiser to try specialisations. Returns an
765 equivalent expression, or NULL if none. */
766
isU32(IRExpr * e,UInt n)767 static inline Bool isU32 ( IRExpr* e, UInt n )
768 {
769 return
770 toBool( e->tag == Iex_Const
771 && e->Iex.Const.con->tag == Ico_U32
772 && e->Iex.Const.con->Ico.U32 == n );
773 }
774
guest_x86_spechelper(HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)775 IRExpr* guest_x86_spechelper ( HChar* function_name,
776 IRExpr** args,
777 IRStmt** precedingStmts,
778 Int n_precedingStmts )
779 {
780 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
781 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
782 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
783 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
784
785 Int i, arity = 0;
786 for (i = 0; args[i]; i++)
787 arity++;
788 # if 0
789 vex_printf("spec request:\n");
790 vex_printf(" %s ", function_name);
791 for (i = 0; i < arity; i++) {
792 vex_printf(" ");
793 ppIRExpr(args[i]);
794 }
795 vex_printf("\n");
796 # endif
797
798 /* --------- specialising "x86g_calculate_condition" --------- */
799
800 if (vex_streq(function_name, "x86g_calculate_condition")) {
801 /* specialise calls to above "calculate condition" function */
802 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
803 vassert(arity == 5);
804 cond = args[0];
805 cc_op = args[1];
806 cc_dep1 = args[2];
807 cc_dep2 = args[3];
808
809 /*---------------- ADDL ----------------*/
810
811 if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
812 /* long add, then Z --> test (dst+src == 0) */
813 return unop(Iop_1Uto32,
814 binop(Iop_CmpEQ32,
815 binop(Iop_Add32, cc_dep1, cc_dep2),
816 mkU32(0)));
817 }
818
819 /*---------------- SUBL ----------------*/
820
821 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
822 /* long sub/cmp, then Z --> test dst==src */
823 return unop(Iop_1Uto32,
824 binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
825 }
826 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
827 /* long sub/cmp, then NZ --> test dst!=src */
828 return unop(Iop_1Uto32,
829 binop(Iop_CmpNE32, cc_dep1, cc_dep2));
830 }
831
832 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
833 /* long sub/cmp, then L (signed less than)
834 --> test dst <s src */
835 return unop(Iop_1Uto32,
836 binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
837 }
838 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
839 /* long sub/cmp, then NL (signed greater than or equal)
840 --> test !(dst <s src) */
841 return binop(Iop_Xor32,
842 unop(Iop_1Uto32,
843 binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
844 mkU32(1));
845 }
846
847 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
848 /* long sub/cmp, then LE (signed less than or equal)
849 --> test dst <=s src */
850 return unop(Iop_1Uto32,
851 binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
852 }
853 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
854 /* long sub/cmp, then NLE (signed not less than or equal)
855 --> test dst >s src
856 --> test !(dst <=s src) */
857 return binop(Iop_Xor32,
858 unop(Iop_1Uto32,
859 binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
860 mkU32(1));
861 }
862
863 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
864 /* long sub/cmp, then BE (unsigned less than or equal)
865 --> test dst <=u src */
866 return unop(Iop_1Uto32,
867 binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
868 }
869 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
870 /* long sub/cmp, then BE (unsigned greater than)
871 --> test !(dst <=u src) */
872 return binop(Iop_Xor32,
873 unop(Iop_1Uto32,
874 binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
875 mkU32(1));
876 }
877
878 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
879 /* long sub/cmp, then B (unsigned less than)
880 --> test dst <u src */
881 return unop(Iop_1Uto32,
882 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
883 }
884 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
885 /* long sub/cmp, then NB (unsigned greater than or equal)
886 --> test !(dst <u src) */
887 return binop(Iop_Xor32,
888 unop(Iop_1Uto32,
889 binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
890 mkU32(1));
891 }
892
893 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
894 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
895 return unop(Iop_1Uto32,
896 binop(Iop_CmpLT32S,
897 binop(Iop_Sub32, cc_dep1, cc_dep2),
898 mkU32(0)));
899 }
900 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
901 /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
902 return binop(Iop_Xor32,
903 unop(Iop_1Uto32,
904 binop(Iop_CmpLT32S,
905 binop(Iop_Sub32, cc_dep1, cc_dep2),
906 mkU32(0))),
907 mkU32(1));
908 }
909
910 /*---------------- SUBW ----------------*/
911
912 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
913 /* word sub/cmp, then Z --> test dst==src */
914 return unop(Iop_1Uto32,
915 binop(Iop_CmpEQ16,
916 unop(Iop_32to16,cc_dep1),
917 unop(Iop_32to16,cc_dep2)));
918 }
919 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
920 /* word sub/cmp, then NZ --> test dst!=src */
921 return unop(Iop_1Uto32,
922 binop(Iop_CmpNE16,
923 unop(Iop_32to16,cc_dep1),
924 unop(Iop_32to16,cc_dep2)));
925 }
926
927 /*---------------- SUBB ----------------*/
928
929 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
930 /* byte sub/cmp, then Z --> test dst==src */
931 return unop(Iop_1Uto32,
932 binop(Iop_CmpEQ8,
933 unop(Iop_32to8,cc_dep1),
934 unop(Iop_32to8,cc_dep2)));
935 }
936 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
937 /* byte sub/cmp, then NZ --> test dst!=src */
938 return unop(Iop_1Uto32,
939 binop(Iop_CmpNE8,
940 unop(Iop_32to8,cc_dep1),
941 unop(Iop_32to8,cc_dep2)));
942 }
943
944 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
945 /* byte sub/cmp, then NBE (unsigned greater than)
946 --> test src <u dst */
947 /* Note, args are opposite way round from the usual */
948 return unop(Iop_1Uto32,
949 binop(Iop_CmpLT32U,
950 binop(Iop_And32,cc_dep2,mkU32(0xFF)),
951 binop(Iop_And32,cc_dep1,mkU32(0xFF))));
952 }
953
954 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
955 && isU32(cc_dep2, 0)) {
956 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
957 --> test dst <s 0
958 --> (UInt)dst[7]
959 This is yet another scheme by which gcc figures out if the
960 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
961 /* Note: isU32(cc_dep2, 0) is correct, even though this is
962 for an 8-bit comparison, since the args to the helper
963 function are always U32s. */
964 return binop(Iop_And32,
965 binop(Iop_Shr32,cc_dep1,mkU8(7)),
966 mkU32(1));
967 }
968 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
969 && isU32(cc_dep2, 0)) {
970 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
971 --> test !(dst <s 0)
972 --> (UInt) !dst[7]
973 */
974 return binop(Iop_Xor32,
975 binop(Iop_And32,
976 binop(Iop_Shr32,cc_dep1,mkU8(7)),
977 mkU32(1)),
978 mkU32(1));
979 }
980
981 /*---------------- LOGICL ----------------*/
982
983 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
984 /* long and/or/xor, then Z --> test dst==0 */
985 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
986 }
987 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
988 /* long and/or/xor, then NZ --> test dst!=0 */
989 return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
990 }
991
992 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
993 /* long and/or/xor, then LE
994 This is pretty subtle. LOGIC sets SF and ZF according to the
995 result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but
996 OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
997 the result is <=signed 0. Hence ...
998 */
999 return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1000 }
1001
1002 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1003 /* long and/or/xor, then BE
1004 LOGIC sets ZF according to the result and makes CF be zero.
1005 BE computes (CF | ZF), but CF is zero, so this reduces ZF
1006 -- which will be 1 iff the result is zero. Hence ...
1007 */
1008 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1009 }
1010
1011 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1012 /* see comment below for (LOGICB, CondS) */
1013 /* long and/or/xor, then S --> (UInt)result[31] */
1014 return binop(Iop_And32,
1015 binop(Iop_Shr32,cc_dep1,mkU8(31)),
1016 mkU32(1));
1017 }
1018 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1019 /* see comment below for (LOGICB, CondNS) */
1020 /* long and/or/xor, then S --> (UInt) ~ result[31] */
1021 return binop(Iop_Xor32,
1022 binop(Iop_And32,
1023 binop(Iop_Shr32,cc_dep1,mkU8(31)),
1024 mkU32(1)),
1025 mkU32(1));
1026 }
1027
1028 /*---------------- LOGICW ----------------*/
1029
1030 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1031 /* word and/or/xor, then Z --> test dst==0 */
1032 return unop(Iop_1Uto32,
1033 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1034 mkU32(0)));
1035 }
1036
1037 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1038 /* see comment below for (LOGICB, CondS) */
1039 /* word and/or/xor, then S --> (UInt)result[15] */
1040 return binop(Iop_And32,
1041 binop(Iop_Shr32,cc_dep1,mkU8(15)),
1042 mkU32(1));
1043 }
1044
1045 /*---------------- LOGICB ----------------*/
1046
1047 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1048 /* byte and/or/xor, then Z --> test dst==0 */
1049 return unop(Iop_1Uto32,
1050 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1051 mkU32(0)));
1052 }
1053 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1054 /* byte and/or/xor, then Z --> test dst!=0 */
1055 /* b9ac9: 84 c0 test %al,%al
1056 b9acb: 75 0d jne b9ada */
1057 return unop(Iop_1Uto32,
1058 binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1059 mkU32(0)));
1060 }
1061
1062 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1063 /* this is an idiom gcc sometimes uses to find out if the top
1064 bit of a byte register is set: eg testb %al,%al; js ..
1065 Since it just depends on the top bit of the byte, extract
1066 that bit and explicitly get rid of all the rest. This
1067 helps memcheck avoid false positives in the case where any
1068 of the other bits in the byte are undefined. */
1069 /* byte and/or/xor, then S --> (UInt)result[7] */
1070 return binop(Iop_And32,
1071 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1072 mkU32(1));
1073 }
1074 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1075 /* ditto, for negation-of-S. */
1076 /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1077 return binop(Iop_Xor32,
1078 binop(Iop_And32,
1079 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1080 mkU32(1)),
1081 mkU32(1));
1082 }
1083
1084 /*---------------- DECL ----------------*/
1085
1086 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1087 /* dec L, then Z --> test dst == 0 */
1088 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1089 }
1090
1091 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1092 /* dec L, then S --> compare DST <s 0 */
1093 return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1094 }
1095
1096 /*---------------- DECW ----------------*/
1097
1098 if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1099 /* dec W, then Z --> test dst == 0 */
1100 return unop(Iop_1Uto32,
1101 binop(Iop_CmpEQ32,
1102 binop(Iop_Shl32,cc_dep1,mkU8(16)),
1103 mkU32(0)));
1104 }
1105
1106 /*---------------- INCW ----------------*/
1107
1108 if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1109 /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1110 /* inc W, then Z --> test dst == 0 */
1111 return unop(Iop_1Uto32,
1112 binop(Iop_CmpEQ32,
1113 binop(Iop_Shl32,cc_dep1,mkU8(16)),
1114 mkU32(0)));
1115 }
1116
1117 /*---------------- SHRL ----------------*/
1118
1119 if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1120 /* SHRL, then Z --> test dep1 == 0 */
1121 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1122 }
1123
1124 /*---------------- COPY ----------------*/
1125 /* This can happen, as a result of x87 FP compares: "fcom ... ;
1126 fnstsw %ax ; sahf ; jbe" for example. */
1127
1128 if (isU32(cc_op, X86G_CC_OP_COPY) &&
1129 (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1130 /* COPY, then BE --> extract C and Z from dep1, and test
1131 (C or Z) == 1. */
1132 /* COPY, then NBE --> extract C and Z from dep1, and test
1133 (C or Z) == 0. */
1134 UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1135 return
1136 unop(
1137 Iop_1Uto32,
1138 binop(
1139 Iop_CmpEQ32,
1140 binop(
1141 Iop_And32,
1142 binop(
1143 Iop_Or32,
1144 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1145 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1146 ),
1147 mkU32(1)
1148 ),
1149 mkU32(nnn)
1150 )
1151 );
1152 }
1153
1154 if (isU32(cc_op, X86G_CC_OP_COPY)
1155 && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1156 /* COPY, then B --> extract C from dep1, and test (C == 1). */
1157 /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1158 UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1159 return
1160 unop(
1161 Iop_1Uto32,
1162 binop(
1163 Iop_CmpEQ32,
1164 binop(
1165 Iop_And32,
1166 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1167 mkU32(1)
1168 ),
1169 mkU32(nnn)
1170 )
1171 );
1172 }
1173
1174 if (isU32(cc_op, X86G_CC_OP_COPY)
1175 && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1176 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1177 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1178 UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1179 return
1180 unop(
1181 Iop_1Uto32,
1182 binop(
1183 Iop_CmpEQ32,
1184 binop(
1185 Iop_And32,
1186 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1187 mkU32(1)
1188 ),
1189 mkU32(nnn)
1190 )
1191 );
1192 }
1193
1194 if (isU32(cc_op, X86G_CC_OP_COPY)
1195 && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1196 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1197 /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1198 UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1199 return
1200 unop(
1201 Iop_1Uto32,
1202 binop(
1203 Iop_CmpEQ32,
1204 binop(
1205 Iop_And32,
1206 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1207 mkU32(1)
1208 ),
1209 mkU32(nnn)
1210 )
1211 );
1212 }
1213
1214 return NULL;
1215 }
1216
1217 /* --------- specialising "x86g_calculate_eflags_c" --------- */
1218
1219 if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1220 /* specialise calls to above "calculate_eflags_c" function */
1221 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1222 vassert(arity == 4);
1223 cc_op = args[0];
1224 cc_dep1 = args[1];
1225 cc_dep2 = args[2];
1226 cc_ndep = args[3];
1227
1228 if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1229 /* C after sub denotes unsigned less than */
1230 return unop(Iop_1Uto32,
1231 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1232 }
1233 if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1234 /* C after sub denotes unsigned less than */
1235 return unop(Iop_1Uto32,
1236 binop(Iop_CmpLT32U,
1237 binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1238 binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1239 }
1240 if (isU32(cc_op, X86G_CC_OP_LOGICL)
1241 || isU32(cc_op, X86G_CC_OP_LOGICW)
1242 || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1243 /* cflag after logic is zero */
1244 return mkU32(0);
1245 }
1246 if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1247 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1248 return cc_ndep;
1249 }
1250 if (isU32(cc_op, X86G_CC_OP_COPY)) {
1251 /* cflag after COPY is stored in DEP1. */
1252 return
1253 binop(
1254 Iop_And32,
1255 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1256 mkU32(1)
1257 );
1258 }
1259 if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1260 /* C after add denotes sum <u either arg */
1261 return unop(Iop_1Uto32,
1262 binop(Iop_CmpLT32U,
1263 binop(Iop_Add32, cc_dep1, cc_dep2),
1264 cc_dep1));
1265 }
1266 // ATC, requires verification, no test case known
1267 //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1268 // /* C after signed widening multiply denotes the case where
1269 // the top half of the result isn't simply the sign extension
1270 // of the bottom half (iow the result doesn't fit completely
1271 // in the bottom half). Hence:
1272 // C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1273 // where 'x' denotes signed widening multiply.*/
1274 // return
1275 // unop(Iop_1Uto32,
1276 // binop(Iop_CmpNE32,
1277 // unop(Iop_64HIto32,
1278 // binop(Iop_MullS32, cc_dep1, cc_dep2)),
1279 // binop(Iop_Sar32,
1280 // binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1281 //}
1282 # if 0
1283 if (cc_op->tag == Iex_Const) {
1284 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1285 }
1286 # endif
1287
1288 return NULL;
1289 }
1290
1291 /* --------- specialising "x86g_calculate_eflags_all" --------- */
1292
1293 if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1294 /* specialise calls to above "calculate_eflags_all" function */
1295 IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1296 vassert(arity == 4);
1297 cc_op = args[0];
1298 cc_dep1 = args[1];
1299 /* cc_dep2 = args[2]; */
1300 /* cc_ndep = args[3]; */
1301
1302 if (isU32(cc_op, X86G_CC_OP_COPY)) {
1303 /* eflags after COPY are stored in DEP1. */
1304 return
1305 binop(
1306 Iop_And32,
1307 cc_dep1,
1308 mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1309 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1310 );
1311 }
1312 return NULL;
1313 }
1314
1315 # undef unop
1316 # undef binop
1317 # undef mkU32
1318 # undef mkU8
1319
1320 return NULL;
1321 }
1322
1323
1324 /*---------------------------------------------------------------*/
1325 /*--- Supporting functions for x87 FPU activities. ---*/
1326 /*---------------------------------------------------------------*/
1327
host_is_little_endian(void)1328 static inline Bool host_is_little_endian ( void )
1329 {
1330 UInt x = 0x76543210;
1331 UChar* p = (UChar*)(&x);
1332 return toBool(*p == 0x10);
1333 }
1334
1335 /* 80 and 64-bit floating point formats:
1336
1337 80-bit:
1338
1339 S 0 0-------0 zero
1340 S 0 0X------X denormals
1341 S 1-7FFE 1X------X normals (all normals have leading 1)
1342 S 7FFF 10------0 infinity
1343 S 7FFF 10X-----X snan
1344 S 7FFF 11X-----X qnan
1345
1346 S is the sign bit. For runs X----X, at least one of the Xs must be
1347 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
1348 there is an explicitly represented leading 1, and a sign bit,
1349 giving 80 in total.
1350
1351 64-bit avoids the confusion of an explicitly represented leading 1
1352 and so is simpler:
1353
1354 S 0 0------0 zero
1355 S 0 X------X denormals
1356 S 1-7FE any normals
1357 S 7FF 0------0 infinity
1358 S 7FF 0X-----X snan
1359 S 7FF 1X-----X qnan
1360
1361 Exponent is 11 bits, fractional part is 52 bits, and there is a
1362 sign bit, giving 64 in total.
1363 */
1364
1365 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1366 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_FXAM(UInt tag,ULong dbl)1367 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1368 {
1369 Bool mantissaIsZero;
1370 Int bexp;
1371 UChar sign;
1372 UChar* f64;
1373
1374 vassert(host_is_little_endian());
1375
1376 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1377
1378 f64 = (UChar*)(&dbl);
1379 sign = toUChar( (f64[7] >> 7) & 1 );
1380
1381 /* First off, if the tag indicates the register was empty,
1382 return 1,0,sign,1 */
1383 if (tag == 0) {
1384 /* vex_printf("Empty\n"); */
1385 return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1386 | X86G_FC_MASK_C0;
1387 }
1388
1389 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1390 bexp &= 0x7FF;
1391
1392 mantissaIsZero
1393 = toBool(
1394 (f64[6] & 0x0F) == 0
1395 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1396 );
1397
1398 /* If both exponent and mantissa are zero, the value is zero.
1399 Return 1,0,sign,0. */
1400 if (bexp == 0 && mantissaIsZero) {
1401 /* vex_printf("Zero\n"); */
1402 return X86G_FC_MASK_C3 | 0
1403 | (sign << X86G_FC_SHIFT_C1) | 0;
1404 }
1405
1406 /* If exponent is zero but mantissa isn't, it's a denormal.
1407 Return 1,1,sign,0. */
1408 if (bexp == 0 && !mantissaIsZero) {
1409 /* vex_printf("Denormal\n"); */
1410 return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1411 | (sign << X86G_FC_SHIFT_C1) | 0;
1412 }
1413
1414 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1415 Return 0,1,sign,1. */
1416 if (bexp == 0x7FF && mantissaIsZero) {
1417 /* vex_printf("Inf\n"); */
1418 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1419 | X86G_FC_MASK_C0;
1420 }
1421
1422 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1423 Return 0,0,sign,1. */
1424 if (bexp == 0x7FF && !mantissaIsZero) {
1425 /* vex_printf("NaN\n"); */
1426 return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1427 }
1428
1429 /* Uh, ok, we give up. It must be a normal finite number.
1430 Return 0,1,sign,0.
1431 */
1432 /* vex_printf("normal\n"); */
1433 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1434 }
1435
1436
1437 /* CALLED FROM GENERATED CODE */
1438 /* DIRTY HELPER (reads guest memory) */
x86g_dirtyhelper_loadF80le(UInt addrU)1439 ULong x86g_dirtyhelper_loadF80le ( UInt addrU )
1440 {
1441 ULong f64;
1442 convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 );
1443 return f64;
1444 }
1445
1446 /* CALLED FROM GENERATED CODE */
1447 /* DIRTY HELPER (writes guest memory) */
x86g_dirtyhelper_storeF80le(UInt addrU,ULong f64)1448 void x86g_dirtyhelper_storeF80le ( UInt addrU, ULong f64 )
1449 {
1450 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) );
1451 }
1452
1453
1454 /*----------------------------------------------*/
1455 /*--- The exported fns .. ---*/
1456 /*----------------------------------------------*/
1457
1458 /* Layout of the real x87 state. */
1459 /* 13 June 05: Fpu_State and auxiliary constants was moved to
1460 g_generic_x87.h */
1461
1462
1463 /* CLEAN HELPER */
1464 /* fpucw[15:0] contains a x87 native format FPU control word.
1465 Extract from it the required FPROUND value and any resulting
1466 emulation warning, and return (warn << 32) | fpround value.
1467 */
x86g_check_fldcw(UInt fpucw)1468 ULong x86g_check_fldcw ( UInt fpucw )
1469 {
1470 /* Decide on a rounding mode. fpucw[11:10] holds it. */
1471 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1472 UInt rmode = (fpucw >> 10) & 3;
1473
1474 /* Detect any required emulation warnings. */
1475 VexEmWarn ew = EmWarn_NONE;
1476
1477 if ((fpucw & 0x3F) != 0x3F) {
1478 /* unmasked exceptions! */
1479 ew = EmWarn_X86_x87exns;
1480 }
1481 else
1482 if (((fpucw >> 8) & 3) != 3) {
1483 /* unsupported precision */
1484 ew = EmWarn_X86_x87precision;
1485 }
1486
1487 return (((ULong)ew) << 32) | ((ULong)rmode);
1488 }
1489
1490 /* CLEAN HELPER */
1491 /* Given fpround as an IRRoundingMode value, create a suitable x87
1492 native format FPU control word. */
x86g_create_fpucw(UInt fpround)1493 UInt x86g_create_fpucw ( UInt fpround )
1494 {
1495 fpround &= 3;
1496 return 0x037F | (fpround << 10);
1497 }
1498
1499
1500 /* CLEAN HELPER */
1501 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1502 Extract from it the required SSEROUND value and any resulting
1503 emulation warning, and return (warn << 32) | sseround value.
1504 */
x86g_check_ldmxcsr(UInt mxcsr)1505 ULong x86g_check_ldmxcsr ( UInt mxcsr )
1506 {
1507 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
1508 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1509 UInt rmode = (mxcsr >> 13) & 3;
1510
1511 /* Detect any required emulation warnings. */
1512 VexEmWarn ew = EmWarn_NONE;
1513
1514 if ((mxcsr & 0x1F80) != 0x1F80) {
1515 /* unmasked exceptions! */
1516 ew = EmWarn_X86_sseExns;
1517 }
1518 else
1519 if (mxcsr & (1<<15)) {
1520 /* FZ is set */
1521 ew = EmWarn_X86_fz;
1522 }
1523 else
1524 if (mxcsr & (1<<6)) {
1525 /* DAZ is set */
1526 ew = EmWarn_X86_daz;
1527 }
1528
1529 return (((ULong)ew) << 32) | ((ULong)rmode);
1530 }
1531
1532
1533 /* CLEAN HELPER */
1534 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1535 native format MXCSR value. */
x86g_create_mxcsr(UInt sseround)1536 UInt x86g_create_mxcsr ( UInt sseround )
1537 {
1538 sseround &= 3;
1539 return 0x1F80 | (sseround << 13);
1540 }
1541
1542
1543 /* CALLED FROM GENERATED CODE */
1544 /* DIRTY HELPER (writes guest state) */
1545 /* Initialise the x87 FPU state as per 'finit'. */
x86g_dirtyhelper_FINIT(VexGuestX86State * gst)1546 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1547 {
1548 Int i;
1549 gst->guest_FTOP = 0;
1550 for (i = 0; i < 8; i++) {
1551 gst->guest_FPTAG[i] = 0; /* empty */
1552 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1553 }
1554 gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1555 gst->guest_FC3210 = 0;
1556 }
1557
1558
1559 /* This is used to implement both 'frstor' and 'fldenv'. The latter
1560 appears to differ from the former only in that the 8 FP registers
1561 themselves are not transferred into the guest state. */
1562 static
do_put_x87(Bool moveRegs,UChar * x87_state,VexGuestX86State * vex_state)1563 VexEmWarn do_put_x87 ( Bool moveRegs,
1564 /*IN*/UChar* x87_state,
1565 /*OUT*/VexGuestX86State* vex_state )
1566 {
1567 Int stno, preg;
1568 UInt tag;
1569 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1570 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1571 Fpu_State* x87 = (Fpu_State*)x87_state;
1572 UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
1573 UInt tagw = x87->env[FP_ENV_TAG];
1574 UInt fpucw = x87->env[FP_ENV_CTRL];
1575 UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700;
1576 VexEmWarn ew;
1577 UInt fpround;
1578 ULong pair;
1579
1580 /* Copy registers and tags */
1581 for (stno = 0; stno < 8; stno++) {
1582 preg = (stno + ftop) & 7;
1583 tag = (tagw >> (2*preg)) & 3;
1584 if (tag == 3) {
1585 /* register is empty */
1586 /* hmm, if it's empty, does it still get written? Probably
1587 safer to say it does. If we don't, memcheck could get out
1588 of sync, in that it thinks all FP registers are defined by
1589 this helper, but in reality some have not been updated. */
1590 if (moveRegs)
1591 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1592 vexTags[preg] = 0;
1593 } else {
1594 /* register is non-empty */
1595 if (moveRegs)
1596 convert_f80le_to_f64le( &x87->reg[10*stno],
1597 (UChar*)&vexRegs[preg] );
1598 vexTags[preg] = 1;
1599 }
1600 }
1601
1602 /* stack pointer */
1603 vex_state->guest_FTOP = ftop;
1604
1605 /* status word */
1606 vex_state->guest_FC3210 = c3210;
1607
1608 /* handle the control word, setting FPROUND and detecting any
1609 emulation warnings. */
1610 pair = x86g_check_fldcw ( (UInt)fpucw );
1611 fpround = (UInt)pair;
1612 ew = (VexEmWarn)(pair >> 32);
1613
1614 vex_state->guest_FPROUND = fpround & 3;
1615
1616 /* emulation warnings --> caller */
1617 return ew;
1618 }
1619
1620
1621 /* Create an x87 FPU state from the guest state, as close as
1622 we can approximate it. */
1623 static
do_get_x87(VexGuestX86State * vex_state,UChar * x87_state)1624 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1625 /*OUT*/UChar* x87_state )
1626 {
1627 Int i, stno, preg;
1628 UInt tagw;
1629 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1630 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1631 Fpu_State* x87 = (Fpu_State*)x87_state;
1632 UInt ftop = vex_state->guest_FTOP;
1633 UInt c3210 = vex_state->guest_FC3210;
1634
1635 for (i = 0; i < 14; i++)
1636 x87->env[i] = 0;
1637
1638 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1639 x87->env[FP_ENV_STAT]
1640 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1641 x87->env[FP_ENV_CTRL]
1642 = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1643
1644 /* Dump the register stack in ST order. */
1645 tagw = 0;
1646 for (stno = 0; stno < 8; stno++) {
1647 preg = (stno + ftop) & 7;
1648 if (vexTags[preg] == 0) {
1649 /* register is empty */
1650 tagw |= (3 << (2*preg));
1651 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1652 &x87->reg[10*stno] );
1653 } else {
1654 /* register is full. */
1655 tagw |= (0 << (2*preg));
1656 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1657 &x87->reg[10*stno] );
1658 }
1659 }
1660 x87->env[FP_ENV_TAG] = toUShort(tagw);
1661 }
1662
1663
1664 /* CALLED FROM GENERATED CODE */
1665 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FXSAVE(VexGuestX86State * gst,HWord addr)1666 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1667 {
1668 /* Somewhat roundabout, but at least it's simple. */
1669 Fpu_State tmp;
1670 UShort* addrS = (UShort*)addr;
1671 UChar* addrC = (UChar*)addr;
1672 U128* xmm = (U128*)(addr + 160);
1673 UInt mxcsr;
1674 UShort fp_tags;
1675 UInt summary_tags;
1676 Int r, stno;
1677 UShort *srcS, *dstS;
1678
1679 do_get_x87( gst, (UChar*)&tmp );
1680 mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1681
1682 /* Now build the proper fxsave image from the x87 image we just
1683 made. */
1684
1685 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1686 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1687
1688 /* set addrS[2] in an endian-independent way */
1689 summary_tags = 0;
1690 fp_tags = tmp.env[FP_ENV_TAG];
1691 for (r = 0; r < 8; r++) {
1692 if ( ((fp_tags >> (2*r)) & 3) != 3 )
1693 summary_tags |= (1 << r);
1694 }
1695 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
1696 addrC[5] = 0; /* pad */
1697
1698 addrS[3] = 0; /* FOP: fpu opcode (bogus) */
1699 addrS[4] = 0;
1700 addrS[5] = 0; /* FPU IP (bogus) */
1701 addrS[6] = 0; /* FPU IP's segment selector (bogus) (although we
1702 could conceivably dump %CS here) */
1703
1704 addrS[7] = 0; /* Intel reserved */
1705
1706 addrS[8] = 0; /* FPU DP (operand pointer) (bogus) */
1707 addrS[9] = 0; /* FPU DP (operand pointer) (bogus) */
1708 addrS[10] = 0; /* segment selector for above operand pointer; %DS
1709 perhaps? */
1710 addrS[11] = 0; /* Intel reserved */
1711
1712 addrS[12] = toUShort(mxcsr); /* MXCSR */
1713 addrS[13] = toUShort(mxcsr >> 16);
1714
1715 addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1716 addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1717
1718 /* Copy in the FP registers, in ST order. */
1719 for (stno = 0; stno < 8; stno++) {
1720 srcS = (UShort*)(&tmp.reg[10*stno]);
1721 dstS = (UShort*)(&addrS[16 + 8*stno]);
1722 dstS[0] = srcS[0];
1723 dstS[1] = srcS[1];
1724 dstS[2] = srcS[2];
1725 dstS[3] = srcS[3];
1726 dstS[4] = srcS[4];
1727 dstS[5] = 0;
1728 dstS[6] = 0;
1729 dstS[7] = 0;
1730 }
1731
1732 /* That's the first 160 bytes of the image done. Now only %xmm0
1733 .. %xmm7 remain to be copied. If the host is big-endian, these
1734 need to be byte-swapped. */
1735 vassert(host_is_little_endian());
1736
1737 # define COPY_U128(_dst,_src) \
1738 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1739 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1740 while (0)
1741
1742 COPY_U128( xmm[0], gst->guest_XMM0 );
1743 COPY_U128( xmm[1], gst->guest_XMM1 );
1744 COPY_U128( xmm[2], gst->guest_XMM2 );
1745 COPY_U128( xmm[3], gst->guest_XMM3 );
1746 COPY_U128( xmm[4], gst->guest_XMM4 );
1747 COPY_U128( xmm[5], gst->guest_XMM5 );
1748 COPY_U128( xmm[6], gst->guest_XMM6 );
1749 COPY_U128( xmm[7], gst->guest_XMM7 );
1750
1751 # undef COPY_U128
1752 }
1753
1754
1755 /* CALLED FROM GENERATED CODE */
1756 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FXRSTOR(VexGuestX86State * gst,HWord addr)1757 VexEmWarn x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1758 {
1759 Fpu_State tmp;
1760 VexEmWarn warnX87 = EmWarn_NONE;
1761 VexEmWarn warnXMM = EmWarn_NONE;
1762 UShort* addrS = (UShort*)addr;
1763 UChar* addrC = (UChar*)addr;
1764 U128* xmm = (U128*)(addr + 160);
1765 UShort fp_tags;
1766 Int r, stno, i;
1767
1768 /* Restore %xmm0 .. %xmm7. If the host is big-endian, these need
1769 to be byte-swapped. */
1770 vassert(host_is_little_endian());
1771
1772 # define COPY_U128(_dst,_src) \
1773 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1774 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1775 while (0)
1776
1777 COPY_U128( gst->guest_XMM0, xmm[0] );
1778 COPY_U128( gst->guest_XMM1, xmm[1] );
1779 COPY_U128( gst->guest_XMM2, xmm[2] );
1780 COPY_U128( gst->guest_XMM3, xmm[3] );
1781 COPY_U128( gst->guest_XMM4, xmm[4] );
1782 COPY_U128( gst->guest_XMM5, xmm[5] );
1783 COPY_U128( gst->guest_XMM6, xmm[6] );
1784 COPY_U128( gst->guest_XMM7, xmm[7] );
1785
1786 # undef COPY_U128
1787
1788 /* Copy the x87 registers out of the image, into a temporary
1789 Fpu_State struct. */
1790
1791 /* LLVM on Darwin turns the following loop into a movaps plus a
1792 handful of scalar stores. This would work fine except for the
1793 fact that VEX doesn't keep the stack correctly (16-) aligned for
1794 the call, so it segfaults. Hence, split the loop into two
1795 pieces (and pray LLVM doesn't merely glue them back together) so
1796 it's composed only of scalar stores and so is alignment
1797 insensitive. Of course this is a kludge of the lamest kind --
1798 VEX should be fixed properly. */
1799 /* Code that seems to trigger the problem:
1800 for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1801 for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1802 for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1803
1804 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1805 /* fill in tmp.reg[0..7] */
1806 for (stno = 0; stno < 8; stno++) {
1807 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1808 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1809 dstS[0] = srcS[0];
1810 dstS[1] = srcS[1];
1811 dstS[2] = srcS[2];
1812 dstS[3] = srcS[3];
1813 dstS[4] = srcS[4];
1814 }
1815 /* fill in tmp.env[0..13] */
1816 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1817 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1818
1819 fp_tags = 0;
1820 for (r = 0; r < 8; r++) {
1821 if (addrC[4] & (1<<r))
1822 fp_tags |= (0 << (2*r)); /* EMPTY */
1823 else
1824 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1825 }
1826 tmp.env[FP_ENV_TAG] = fp_tags;
1827
1828 /* Now write 'tmp' into the guest state. */
1829 warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
1830
1831 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1832 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1833 ULong w64 = x86g_check_ldmxcsr( w32 );
1834
1835 warnXMM = (VexEmWarn)(w64 >> 32);
1836
1837 gst->guest_SSEROUND = (UInt)w64;
1838 }
1839
1840 /* Prefer an X87 emwarn over an XMM one, if both exist. */
1841 if (warnX87 != EmWarn_NONE)
1842 return warnX87;
1843 else
1844 return warnXMM;
1845 }
1846
1847
1848 /* CALLED FROM GENERATED CODE */
1849 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSAVE(VexGuestX86State * gst,HWord addr)1850 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1851 {
1852 do_get_x87( gst, (UChar*)addr );
1853 }
1854
1855 /* CALLED FROM GENERATED CODE */
1856 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FRSTOR(VexGuestX86State * gst,HWord addr)1857 VexEmWarn x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1858 {
1859 return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
1860 }
1861
1862 /* CALLED FROM GENERATED CODE */
1863 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSTENV(VexGuestX86State * gst,HWord addr)1864 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1865 {
1866 /* Somewhat roundabout, but at least it's simple. */
1867 Int i;
1868 UShort* addrP = (UShort*)addr;
1869 Fpu_State tmp;
1870 do_get_x87( gst, (UChar*)&tmp );
1871 for (i = 0; i < 14; i++)
1872 addrP[i] = tmp.env[i];
1873 }
1874
1875 /* CALLED FROM GENERATED CODE */
1876 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FLDENV(VexGuestX86State * gst,HWord addr)1877 VexEmWarn x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1878 {
1879 return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
1880 }
1881
1882
1883 /*---------------------------------------------------------------*/
1884 /*--- Misc integer helpers, including rotates and CPUID. ---*/
1885 /*---------------------------------------------------------------*/
1886
1887 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1888 /* Calculate both flags and value result for rotate right
1889 through the carry bit. Result in low 32 bits,
1890 new flags (OSZACP) in high 32 bits.
1891 */
x86g_calculate_RCR(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)1892 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1893 {
1894 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1895
1896 switch (sz) {
1897 case 4:
1898 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1899 of = ((arg >> 31) ^ cf) & 1;
1900 while (tempCOUNT > 0) {
1901 tempcf = arg & 1;
1902 arg = (arg >> 1) | (cf << 31);
1903 cf = tempcf;
1904 tempCOUNT--;
1905 }
1906 break;
1907 case 2:
1908 while (tempCOUNT >= 17) tempCOUNT -= 17;
1909 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1910 of = ((arg >> 15) ^ cf) & 1;
1911 while (tempCOUNT > 0) {
1912 tempcf = arg & 1;
1913 arg = ((arg >> 1) & 0x7FFF) | (cf << 15);
1914 cf = tempcf;
1915 tempCOUNT--;
1916 }
1917 break;
1918 case 1:
1919 while (tempCOUNT >= 9) tempCOUNT -= 9;
1920 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1921 of = ((arg >> 7) ^ cf) & 1;
1922 while (tempCOUNT > 0) {
1923 tempcf = arg & 1;
1924 arg = ((arg >> 1) & 0x7F) | (cf << 7);
1925 cf = tempcf;
1926 tempCOUNT--;
1927 }
1928 break;
1929 default:
1930 vpanic("calculate_RCR: invalid size");
1931 }
1932
1933 cf &= 1;
1934 of &= 1;
1935 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
1936 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
1937
1938 return (((ULong)eflags_in) << 32) | ((ULong)arg);
1939 }
1940
1941
1942 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1943 /* Calculate both flags and value result for rotate left
1944 through the carry bit. Result in low 32 bits,
1945 new flags (OSZACP) in high 32 bits.
1946 */
x86g_calculate_RCL(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)1947 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1948 {
1949 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1950
1951 switch (sz) {
1952 case 4:
1953 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1954 while (tempCOUNT > 0) {
1955 tempcf = (arg >> 31) & 1;
1956 arg = (arg << 1) | (cf & 1);
1957 cf = tempcf;
1958 tempCOUNT--;
1959 }
1960 of = ((arg >> 31) ^ cf) & 1;
1961 break;
1962 case 2:
1963 while (tempCOUNT >= 17) tempCOUNT -= 17;
1964 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1965 while (tempCOUNT > 0) {
1966 tempcf = (arg >> 15) & 1;
1967 arg = 0xFFFF & ((arg << 1) | (cf & 1));
1968 cf = tempcf;
1969 tempCOUNT--;
1970 }
1971 of = ((arg >> 15) ^ cf) & 1;
1972 break;
1973 case 1:
1974 while (tempCOUNT >= 9) tempCOUNT -= 9;
1975 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1976 while (tempCOUNT > 0) {
1977 tempcf = (arg >> 7) & 1;
1978 arg = 0xFF & ((arg << 1) | (cf & 1));
1979 cf = tempcf;
1980 tempCOUNT--;
1981 }
1982 of = ((arg >> 7) ^ cf) & 1;
1983 break;
1984 default:
1985 vpanic("calculate_RCL: invalid size");
1986 }
1987
1988 cf &= 1;
1989 of &= 1;
1990 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
1991 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
1992
1993 return (((ULong)eflags_in) << 32) | ((ULong)arg);
1994 }
1995
1996
1997 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1998 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
1999 AX value in low half of arg, OSZACP in upper half.
2000 See guest-x86/toIR.c usage point for details.
2001 */
calc_parity_8bit(UInt w32)2002 static UInt calc_parity_8bit ( UInt w32 ) {
2003 UInt i;
2004 UInt p = 1;
2005 for (i = 0; i < 8; i++)
2006 p ^= (1 & (w32 >> i));
2007 return p;
2008 }
x86g_calculate_daa_das_aaa_aas(UInt flags_and_AX,UInt opcode)2009 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2010 {
2011 UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2012 UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2013 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2014 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2015 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2016 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2017 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2018 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2019 UInt result = 0;
2020
2021 switch (opcode) {
2022 case 0x27: { /* DAA */
2023 UInt old_AL = r_AL;
2024 UInt old_C = r_C;
2025 r_C = 0;
2026 if ((r_AL & 0xF) > 9 || r_A == 1) {
2027 r_AL = r_AL + 6;
2028 r_C = old_C;
2029 if (r_AL >= 0x100) r_C = 1;
2030 r_A = 1;
2031 } else {
2032 r_A = 0;
2033 }
2034 if (old_AL > 0x99 || old_C == 1) {
2035 r_AL = r_AL + 0x60;
2036 r_C = 1;
2037 } else {
2038 r_C = 0;
2039 }
2040 /* O is undefined. S Z and P are set according to the
2041 result. */
2042 r_AL &= 0xFF;
2043 r_O = 0; /* let's say */
2044 r_S = (r_AL & 0x80) ? 1 : 0;
2045 r_Z = (r_AL == 0) ? 1 : 0;
2046 r_P = calc_parity_8bit( r_AL );
2047 break;
2048 }
2049 case 0x2F: { /* DAS */
2050 UInt old_AL = r_AL;
2051 UInt old_C = r_C;
2052 r_C = 0;
2053 if ((r_AL & 0xF) > 9 || r_A == 1) {
2054 Bool borrow = r_AL < 6;
2055 r_AL = r_AL - 6;
2056 r_C = old_C;
2057 if (borrow) r_C = 1;
2058 r_A = 1;
2059 } else {
2060 r_A = 0;
2061 }
2062 if (old_AL > 0x99 || old_C == 1) {
2063 r_AL = r_AL - 0x60;
2064 r_C = 1;
2065 } else {
2066 /* Intel docs are wrong: r_C = 0; */
2067 }
2068 /* O is undefined. S Z and P are set according to the
2069 result. */
2070 r_AL &= 0xFF;
2071 r_O = 0; /* let's say */
2072 r_S = (r_AL & 0x80) ? 1 : 0;
2073 r_Z = (r_AL == 0) ? 1 : 0;
2074 r_P = calc_parity_8bit( r_AL );
2075 break;
2076 }
2077 case 0x37: { /* AAA */
2078 Bool nudge = r_AL > 0xF9;
2079 if ((r_AL & 0xF) > 9 || r_A == 1) {
2080 r_AL = r_AL + 6;
2081 r_AH = r_AH + 1 + (nudge ? 1 : 0);
2082 r_A = 1;
2083 r_C = 1;
2084 r_AL = r_AL & 0xF;
2085 } else {
2086 r_A = 0;
2087 r_C = 0;
2088 r_AL = r_AL & 0xF;
2089 }
2090 /* O S Z and P are undefined. */
2091 r_O = r_S = r_Z = r_P = 0; /* let's say */
2092 break;
2093 }
2094 case 0x3F: { /* AAS */
2095 Bool nudge = r_AL < 0x06;
2096 if ((r_AL & 0xF) > 9 || r_A == 1) {
2097 r_AL = r_AL - 6;
2098 r_AH = r_AH - 1 - (nudge ? 1 : 0);
2099 r_A = 1;
2100 r_C = 1;
2101 r_AL = r_AL & 0xF;
2102 } else {
2103 r_A = 0;
2104 r_C = 0;
2105 r_AL = r_AL & 0xF;
2106 }
2107 /* O S Z and P are undefined. */
2108 r_O = r_S = r_Z = r_P = 0; /* let's say */
2109 break;
2110 }
2111 default:
2112 vassert(0);
2113 }
2114 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2115 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2116 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2117 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2118 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2119 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2120 | ( (r_AH & 0xFF) << 8 )
2121 | ( (r_AL & 0xFF) << 0 );
2122 return result;
2123 }
2124
x86g_calculate_aad_aam(UInt flags_and_AX,UInt opcode)2125 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2126 {
2127 UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2128 UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2129 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2130 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2131 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2132 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2133 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2134 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2135 UInt result = 0;
2136
2137 switch (opcode) {
2138 case 0xD4: { /* AAM */
2139 r_AH = r_AL / 10;
2140 r_AL = r_AL % 10;
2141 break;
2142 }
2143 case 0xD5: { /* AAD */
2144 r_AL = ((r_AH * 10) + r_AL) & 0xff;
2145 r_AH = 0;
2146 break;
2147 }
2148 default:
2149 vassert(0);
2150 }
2151
2152 r_O = 0; /* let's say (undefined) */
2153 r_C = 0; /* let's say (undefined) */
2154 r_A = 0; /* let's say (undefined) */
2155 r_S = (r_AL & 0x80) ? 1 : 0;
2156 r_Z = (r_AL == 0) ? 1 : 0;
2157 r_P = calc_parity_8bit( r_AL );
2158
2159 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2160 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2161 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2162 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2163 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2164 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2165 | ( (r_AH & 0xFF) << 8 )
2166 | ( (r_AL & 0xFF) << 0 );
2167 return result;
2168 }
2169
2170
2171 /* CALLED FROM GENERATED CODE */
2172 /* DIRTY HELPER (non-referentially-transparent) */
2173 /* Horrible hack. On non-x86 platforms, return 1. */
x86g_dirtyhelper_RDTSC(void)2174 ULong x86g_dirtyhelper_RDTSC ( void )
2175 {
2176 # if defined(__i386__)
2177 ULong res;
2178 __asm__ __volatile__("rdtsc" : "=A" (res));
2179 return res;
2180 # else
2181 return 1ULL;
2182 # endif
2183 }
2184
2185
2186 /* CALLED FROM GENERATED CODE */
2187 /* DIRTY HELPER (modifies guest state) */
2188 /* Claim to be a P55C (Intel Pentium/MMX) */
x86g_dirtyhelper_CPUID_sse0(VexGuestX86State * st)2189 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2190 {
2191 switch (st->guest_EAX) {
2192 case 0:
2193 st->guest_EAX = 0x1;
2194 st->guest_EBX = 0x756e6547;
2195 st->guest_ECX = 0x6c65746e;
2196 st->guest_EDX = 0x49656e69;
2197 break;
2198 default:
2199 st->guest_EAX = 0x543;
2200 st->guest_EBX = 0x0;
2201 st->guest_ECX = 0x0;
2202 st->guest_EDX = 0x8001bf;
2203 break;
2204 }
2205 }
2206
2207 /* CALLED FROM GENERATED CODE */
2208 /* DIRTY HELPER (modifies guest state) */
2209 /* Claim to be the following SSE1-capable CPU:
2210 vendor_id : GenuineIntel
2211 cpu family : 6
2212 model : 11
2213 model name : Intel(R) Pentium(R) III CPU family 1133MHz
2214 stepping : 1
2215 cpu MHz : 1131.013
2216 cache size : 512 KB
2217 */
x86g_dirtyhelper_CPUID_sse1(VexGuestX86State * st)2218 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2219 {
2220 switch (st->guest_EAX) {
2221 case 0:
2222 st->guest_EAX = 0x00000002;
2223 st->guest_EBX = 0x756e6547;
2224 st->guest_ECX = 0x6c65746e;
2225 st->guest_EDX = 0x49656e69;
2226 break;
2227 case 1:
2228 st->guest_EAX = 0x000006b1;
2229 st->guest_EBX = 0x00000004;
2230 st->guest_ECX = 0x00000000;
2231 st->guest_EDX = 0x0383fbff;
2232 break;
2233 default:
2234 st->guest_EAX = 0x03020101;
2235 st->guest_EBX = 0x00000000;
2236 st->guest_ECX = 0x00000000;
2237 st->guest_EDX = 0x0c040883;
2238 break;
2239 }
2240 }
2241
2242 /* Claim to be the following SSSE3-capable CPU (2 x ...):
2243 vendor_id : GenuineIntel
2244 cpu family : 6
2245 model : 15
2246 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2247 stepping : 6
2248 cpu MHz : 2394.000
2249 cache size : 4096 KB
2250 physical id : 0
2251 siblings : 2
2252 core id : 0
2253 cpu cores : 2
2254 fpu : yes
2255 fpu_exception : yes
2256 cpuid level : 10
2257 wp : yes
2258 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2259 mtrr pge mca cmov pat pse36 clflush dts acpi
2260 mmx fxsr sse sse2 ss ht tm syscall nx lm
2261 constant_tsc pni monitor ds_cpl vmx est tm2
2262 cx16 xtpr lahf_lm
2263 bogomips : 4798.78
2264 clflush size : 64
2265 cache_alignment : 64
2266 address sizes : 36 bits physical, 48 bits virtual
2267 power management:
2268 */
x86g_dirtyhelper_CPUID_sse2(VexGuestX86State * st)2269 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2270 {
2271 # define SET_ABCD(_a,_b,_c,_d) \
2272 do { st->guest_EAX = (UInt)(_a); \
2273 st->guest_EBX = (UInt)(_b); \
2274 st->guest_ECX = (UInt)(_c); \
2275 st->guest_EDX = (UInt)(_d); \
2276 } while (0)
2277
2278 switch (st->guest_EAX) {
2279 case 0x00000000:
2280 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2281 break;
2282 case 0x00000001:
2283 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2284 break;
2285 case 0x00000002:
2286 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2287 break;
2288 case 0x00000003:
2289 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2290 break;
2291 case 0x00000004: {
2292 switch (st->guest_ECX) {
2293 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2294 0x0000003f, 0x00000001); break;
2295 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2296 0x0000003f, 0x00000001); break;
2297 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2298 0x00000fff, 0x00000001); break;
2299 default: SET_ABCD(0x00000000, 0x00000000,
2300 0x00000000, 0x00000000); break;
2301 }
2302 break;
2303 }
2304 case 0x00000005:
2305 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2306 break;
2307 case 0x00000006:
2308 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2309 break;
2310 case 0x00000007:
2311 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2312 break;
2313 case 0x00000008:
2314 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2315 break;
2316 case 0x00000009:
2317 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2318 break;
2319 case 0x0000000a:
2320 unhandled_eax_value:
2321 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2322 break;
2323 case 0x80000000:
2324 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2325 break;
2326 case 0x80000001:
2327 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2328 break;
2329 case 0x80000002:
2330 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2331 break;
2332 case 0x80000003:
2333 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2334 break;
2335 case 0x80000004:
2336 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2337 break;
2338 case 0x80000005:
2339 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2340 break;
2341 case 0x80000006:
2342 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2343 break;
2344 case 0x80000007:
2345 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2346 break;
2347 case 0x80000008:
2348 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2349 break;
2350 default:
2351 goto unhandled_eax_value;
2352 }
2353 # undef SET_ABCD
2354 }
2355
2356
2357 /* CALLED FROM GENERATED CODE */
2358 /* DIRTY HELPER (non-referentially-transparent) */
2359 /* Horrible hack. On non-x86 platforms, return 0. */
x86g_dirtyhelper_IN(UInt portno,UInt sz)2360 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2361 {
2362 # if defined(__i386__)
2363 UInt r = 0;
2364 portno &= 0xFFFF;
2365 switch (sz) {
2366 case 4:
2367 __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2368 : "=a" (r) : "Nd" (portno));
2369 break;
2370 case 2:
2371 __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2372 : "=a" (r) : "Nd" (portno));
2373 break;
2374 case 1:
2375 __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2376 : "=a" (r) : "Nd" (portno));
2377 break;
2378 default:
2379 break;
2380 }
2381 return r;
2382 # else
2383 return 0;
2384 # endif
2385 }
2386
2387
2388 /* CALLED FROM GENERATED CODE */
2389 /* DIRTY HELPER (non-referentially-transparent) */
2390 /* Horrible hack. On non-x86 platforms, do nothing. */
x86g_dirtyhelper_OUT(UInt portno,UInt data,UInt sz)2391 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2392 {
2393 # if defined(__i386__)
2394 portno &= 0xFFFF;
2395 switch (sz) {
2396 case 4:
2397 __asm__ __volatile__("outl %0, %w1"
2398 : : "a" (data), "Nd" (portno));
2399 break;
2400 case 2:
2401 __asm__ __volatile__("outw %w0, %w1"
2402 : : "a" (data), "Nd" (portno));
2403 break;
2404 case 1:
2405 __asm__ __volatile__("outb %b0, %w1"
2406 : : "a" (data), "Nd" (portno));
2407 break;
2408 default:
2409 break;
2410 }
2411 # else
2412 /* do nothing */
2413 # endif
2414 }
2415
2416 /* CALLED FROM GENERATED CODE */
2417 /* DIRTY HELPER (non-referentially-transparent) */
2418 /* Horrible hack. On non-x86 platforms, do nothing. */
2419 /* op = 0: call the native SGDT instruction.
2420 op = 1: call the native SIDT instruction.
2421 */
x86g_dirtyhelper_SxDT(void * address,UInt op)2422 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2423 # if defined(__i386__)
2424 switch (op) {
2425 case 0:
2426 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2427 break;
2428 case 1:
2429 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2430 break;
2431 default:
2432 vpanic("x86g_dirtyhelper_SxDT");
2433 }
2434 # else
2435 /* do nothing */
2436 UChar* p = (UChar*)address;
2437 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2438 # endif
2439 }
2440
2441 /*---------------------------------------------------------------*/
2442 /*--- Helpers for MMX/SSE/SSE2. ---*/
2443 /*---------------------------------------------------------------*/
2444
abdU8(UChar xx,UChar yy)2445 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2446 return toUChar(xx>yy ? xx-yy : yy-xx);
2447 }
2448
mk32x2(UInt w1,UInt w0)2449 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2450 return (((ULong)w1) << 32) | ((ULong)w0);
2451 }
2452
sel16x4_3(ULong w64)2453 static inline UShort sel16x4_3 ( ULong w64 ) {
2454 UInt hi32 = toUInt(w64 >> 32);
2455 return toUShort(hi32 >> 16);
2456 }
sel16x4_2(ULong w64)2457 static inline UShort sel16x4_2 ( ULong w64 ) {
2458 UInt hi32 = toUInt(w64 >> 32);
2459 return toUShort(hi32);
2460 }
sel16x4_1(ULong w64)2461 static inline UShort sel16x4_1 ( ULong w64 ) {
2462 UInt lo32 = toUInt(w64);
2463 return toUShort(lo32 >> 16);
2464 }
sel16x4_0(ULong w64)2465 static inline UShort sel16x4_0 ( ULong w64 ) {
2466 UInt lo32 = toUInt(w64);
2467 return toUShort(lo32);
2468 }
2469
sel8x8_7(ULong w64)2470 static inline UChar sel8x8_7 ( ULong w64 ) {
2471 UInt hi32 = toUInt(w64 >> 32);
2472 return toUChar(hi32 >> 24);
2473 }
sel8x8_6(ULong w64)2474 static inline UChar sel8x8_6 ( ULong w64 ) {
2475 UInt hi32 = toUInt(w64 >> 32);
2476 return toUChar(hi32 >> 16);
2477 }
sel8x8_5(ULong w64)2478 static inline UChar sel8x8_5 ( ULong w64 ) {
2479 UInt hi32 = toUInt(w64 >> 32);
2480 return toUChar(hi32 >> 8);
2481 }
sel8x8_4(ULong w64)2482 static inline UChar sel8x8_4 ( ULong w64 ) {
2483 UInt hi32 = toUInt(w64 >> 32);
2484 return toUChar(hi32 >> 0);
2485 }
sel8x8_3(ULong w64)2486 static inline UChar sel8x8_3 ( ULong w64 ) {
2487 UInt lo32 = toUInt(w64);
2488 return toUChar(lo32 >> 24);
2489 }
sel8x8_2(ULong w64)2490 static inline UChar sel8x8_2 ( ULong w64 ) {
2491 UInt lo32 = toUInt(w64);
2492 return toUChar(lo32 >> 16);
2493 }
sel8x8_1(ULong w64)2494 static inline UChar sel8x8_1 ( ULong w64 ) {
2495 UInt lo32 = toUInt(w64);
2496 return toUChar(lo32 >> 8);
2497 }
sel8x8_0(ULong w64)2498 static inline UChar sel8x8_0 ( ULong w64 ) {
2499 UInt lo32 = toUInt(w64);
2500 return toUChar(lo32 >> 0);
2501 }
2502
2503 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_pmaddwd(ULong xx,ULong yy)2504 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2505 {
2506 return
2507 mk32x2(
2508 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2509 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2510 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2511 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2512 );
2513 }
2514
2515 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_pmovmskb(ULong xx)2516 UInt x86g_calculate_mmx_pmovmskb ( ULong xx )
2517 {
2518 UInt r = 0;
2519 if (xx & (1ULL << (64-1))) r |= (1<<7);
2520 if (xx & (1ULL << (56-1))) r |= (1<<6);
2521 if (xx & (1ULL << (48-1))) r |= (1<<5);
2522 if (xx & (1ULL << (40-1))) r |= (1<<4);
2523 if (xx & (1ULL << (32-1))) r |= (1<<3);
2524 if (xx & (1ULL << (24-1))) r |= (1<<2);
2525 if (xx & (1ULL << (16-1))) r |= (1<<1);
2526 if (xx & (1ULL << ( 8-1))) r |= (1<<0);
2527 return r;
2528 }
2529
2530 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_psadbw(ULong xx,ULong yy)2531 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2532 {
2533 UInt t = 0;
2534 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2535 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2536 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2537 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2538 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2539 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2540 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2541 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2542 t &= 0xFFFF;
2543 return (ULong)t;
2544 }
2545
2546 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_sse_pmovmskb(ULong w64hi,ULong w64lo)2547 UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
2548 {
2549 UInt rHi8 = x86g_calculate_mmx_pmovmskb ( w64hi );
2550 UInt rLo8 = x86g_calculate_mmx_pmovmskb ( w64lo );
2551 return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
2552 }
2553
2554
2555 /*---------------------------------------------------------------*/
2556 /*--- Helpers for dealing with segment overrides. ---*/
2557 /*---------------------------------------------------------------*/
2558
2559 static inline
get_segdescr_base(VexGuestX86SegDescr * ent)2560 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2561 {
2562 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2563 UInt mid = 0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2564 UInt hi = 0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2565 return (hi << 24) | (mid << 16) | lo;
2566 }
2567
2568 static inline
get_segdescr_limit(VexGuestX86SegDescr * ent)2569 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2570 {
2571 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2572 UInt hi = 0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2573 UInt limit = (hi << 16) | lo;
2574 if (ent->LdtEnt.Bits.Granularity)
2575 limit = (limit << 12) | 0xFFF;
2576 return limit;
2577 }
2578
2579 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_use_seg_selector(HWord ldt,HWord gdt,UInt seg_selector,UInt virtual_addr)2580 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2581 UInt seg_selector, UInt virtual_addr )
2582 {
2583 UInt tiBit, base, limit;
2584 VexGuestX86SegDescr* the_descrs;
2585
2586 Bool verboze = False;
2587
2588 /* If this isn't true, we're in Big Trouble. */
2589 vassert(8 == sizeof(VexGuestX86SegDescr));
2590
2591 if (verboze)
2592 vex_printf("x86h_use_seg_selector: "
2593 "seg_selector = 0x%x, vaddr = 0x%x\n",
2594 seg_selector, virtual_addr);
2595
2596 /* Check for wildly invalid selector. */
2597 if (seg_selector & ~0xFFFF)
2598 goto bad;
2599
2600 seg_selector &= 0x0000FFFF;
2601
2602 /* Sanity check the segment selector. Ensure that RPL=11b (least
2603 privilege). This forms the bottom 2 bits of the selector. */
2604 if ((seg_selector & 3) != 3)
2605 goto bad;
2606
2607 /* Extract the TI bit (0 means GDT, 1 means LDT) */
2608 tiBit = (seg_selector >> 2) & 1;
2609
2610 /* Convert the segment selector onto a table index */
2611 seg_selector >>= 3;
2612 vassert(seg_selector >= 0 && seg_selector < 8192);
2613
2614 if (tiBit == 0) {
2615
2616 /* GDT access. */
2617 /* Do we actually have a GDT to look at? */
2618 if (gdt == 0)
2619 goto bad;
2620
2621 /* Check for access to non-existent entry. */
2622 if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
2623 goto bad;
2624
2625 the_descrs = (VexGuestX86SegDescr*)gdt;
2626 base = get_segdescr_base (&the_descrs[seg_selector]);
2627 limit = get_segdescr_limit(&the_descrs[seg_selector]);
2628
2629 } else {
2630
2631 /* All the same stuff, except for the LDT. */
2632 if (ldt == 0)
2633 goto bad;
2634
2635 if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2636 goto bad;
2637
2638 the_descrs = (VexGuestX86SegDescr*)ldt;
2639 base = get_segdescr_base (&the_descrs[seg_selector]);
2640 limit = get_segdescr_limit(&the_descrs[seg_selector]);
2641
2642 }
2643
2644 /* Do the limit check. Note, this check is just slightly too
2645 slack. Really it should be "if (virtual_addr + size - 1 >=
2646 limit)," but we don't have the size info to hand. Getting it
2647 could be significantly complex. */
2648 if (virtual_addr >= limit)
2649 goto bad;
2650
2651 if (verboze)
2652 vex_printf("x86h_use_seg_selector: "
2653 "base = 0x%x, addr = 0x%x\n",
2654 base, base + virtual_addr);
2655
2656 /* High 32 bits are zero, indicating success. */
2657 return (ULong)( ((UInt)virtual_addr) + base );
2658
2659 bad:
2660 return 1ULL << 32;
2661 }
2662
2663
2664 /*---------------------------------------------------------------*/
2665 /*--- Helpers for dealing with, and describing, ---*/
2666 /*--- guest state as a whole. ---*/
2667 /*---------------------------------------------------------------*/
2668
2669 /* Initialise the entire x86 guest state. */
2670 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_initialise(VexGuestX86State * vex_state)2671 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2672 {
2673 vex_state->host_EvC_FAILADDR = 0;
2674 vex_state->host_EvC_COUNTER = 0;
2675
2676 vex_state->guest_EAX = 0;
2677 vex_state->guest_ECX = 0;
2678 vex_state->guest_EDX = 0;
2679 vex_state->guest_EBX = 0;
2680 vex_state->guest_ESP = 0;
2681 vex_state->guest_EBP = 0;
2682 vex_state->guest_ESI = 0;
2683 vex_state->guest_EDI = 0;
2684
2685 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
2686 vex_state->guest_CC_DEP1 = 0;
2687 vex_state->guest_CC_DEP2 = 0;
2688 vex_state->guest_CC_NDEP = 0;
2689 vex_state->guest_DFLAG = 1; /* forwards */
2690 vex_state->guest_IDFLAG = 0;
2691 vex_state->guest_ACFLAG = 0;
2692
2693 vex_state->guest_EIP = 0;
2694
2695 /* Initialise the simulated FPU */
2696 x86g_dirtyhelper_FINIT( vex_state );
2697
2698 /* Initialse the SSE state. */
2699 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2700
2701 vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2702 SSEZERO(vex_state->guest_XMM0);
2703 SSEZERO(vex_state->guest_XMM1);
2704 SSEZERO(vex_state->guest_XMM2);
2705 SSEZERO(vex_state->guest_XMM3);
2706 SSEZERO(vex_state->guest_XMM4);
2707 SSEZERO(vex_state->guest_XMM5);
2708 SSEZERO(vex_state->guest_XMM6);
2709 SSEZERO(vex_state->guest_XMM7);
2710
2711 # undef SSEZERO
2712
2713 vex_state->guest_CS = 0;
2714 vex_state->guest_DS = 0;
2715 vex_state->guest_ES = 0;
2716 vex_state->guest_FS = 0;
2717 vex_state->guest_GS = 0;
2718 vex_state->guest_SS = 0;
2719 vex_state->guest_LDT = 0;
2720 vex_state->guest_GDT = 0;
2721
2722 vex_state->guest_EMWARN = EmWarn_NONE;
2723
2724 /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2725 vex_state->guest_TISTART = 0;
2726 vex_state->guest_TILEN = 0;
2727
2728 vex_state->guest_NRADDR = 0;
2729 vex_state->guest_SC_CLASS = 0;
2730 vex_state->guest_IP_AT_SYSCALL = 0;
2731
2732 Int i;
2733 for (i = 0; i < sizeof(vex_state->padding)
2734 / sizeof(vex_state->padding[0]); i++) {
2735 vex_state->padding[i] = 0;
2736 }
2737 }
2738
2739
2740 /* Figure out if any part of the guest state contained in minoff
2741 .. maxoff requires precise memory exceptions. If in doubt return
2742 True (but this is generates significantly slower code).
2743
2744 By default we enforce precise exns for guest %ESP, %EBP and %EIP
2745 only. These are the minimum needed to extract correct stack
2746 backtraces from x86 code.
2747 */
guest_x86_state_requires_precise_mem_exns(Int minoff,Int maxoff)2748 Bool guest_x86_state_requires_precise_mem_exns ( Int minoff,
2749 Int maxoff)
2750 {
2751 Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2752 Int ebp_max = ebp_min + 4 - 1;
2753 Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2754 Int esp_max = esp_min + 4 - 1;
2755 Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2756 Int eip_max = eip_min + 4 - 1;
2757
2758 if (maxoff < ebp_min || minoff > ebp_max) {
2759 /* no overlap with ebp */
2760 } else {
2761 return True;
2762 }
2763
2764 if (maxoff < esp_min || minoff > esp_max) {
2765 /* no overlap with esp */
2766 } else {
2767 return True;
2768 }
2769
2770 if (maxoff < eip_min || minoff > eip_max) {
2771 /* no overlap with eip */
2772 } else {
2773 return True;
2774 }
2775
2776 return False;
2777 }
2778
2779
2780 #define ALWAYSDEFD(field) \
2781 { offsetof(VexGuestX86State, field), \
2782 (sizeof ((VexGuestX86State*)0)->field) }
2783
2784 VexGuestLayout
2785 x86guest_layout
2786 = {
2787 /* Total size of the guest state, in bytes. */
2788 .total_sizeB = sizeof(VexGuestX86State),
2789
2790 /* Describe the stack pointer. */
2791 .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2792 .sizeof_SP = 4,
2793
2794 /* Describe the frame pointer. */
2795 .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2796 .sizeof_FP = 4,
2797
2798 /* Describe the instruction pointer. */
2799 .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2800 .sizeof_IP = 4,
2801
2802 /* Describe any sections to be regarded by Memcheck as
2803 'always-defined'. */
2804 .n_alwaysDefd = 24,
2805
2806 /* flags thunk: OP and NDEP are always defd, whereas DEP1
2807 and DEP2 have to be tracked. See detailed comment in
2808 gdefs.h on meaning of thunk fields. */
2809 .alwaysDefd
2810 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
2811 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
2812 /* 2 */ ALWAYSDEFD(guest_DFLAG),
2813 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
2814 /* 4 */ ALWAYSDEFD(guest_ACFLAG),
2815 /* 5 */ ALWAYSDEFD(guest_EIP),
2816 /* 6 */ ALWAYSDEFD(guest_FTOP),
2817 /* 7 */ ALWAYSDEFD(guest_FPTAG),
2818 /* 8 */ ALWAYSDEFD(guest_FPROUND),
2819 /* 9 */ ALWAYSDEFD(guest_FC3210),
2820 /* 10 */ ALWAYSDEFD(guest_CS),
2821 /* 11 */ ALWAYSDEFD(guest_DS),
2822 /* 12 */ ALWAYSDEFD(guest_ES),
2823 /* 13 */ ALWAYSDEFD(guest_FS),
2824 /* 14 */ ALWAYSDEFD(guest_GS),
2825 /* 15 */ ALWAYSDEFD(guest_SS),
2826 /* 16 */ ALWAYSDEFD(guest_LDT),
2827 /* 17 */ ALWAYSDEFD(guest_GDT),
2828 /* 18 */ ALWAYSDEFD(guest_EMWARN),
2829 /* 19 */ ALWAYSDEFD(guest_SSEROUND),
2830 /* 20 */ ALWAYSDEFD(guest_TISTART),
2831 /* 21 */ ALWAYSDEFD(guest_TILEN),
2832 /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
2833 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
2834 }
2835 };
2836
2837
2838 /*---------------------------------------------------------------*/
2839 /*--- end guest_x86_helpers.c ---*/
2840 /*---------------------------------------------------------------*/
2841