1
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_x86_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_x86.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_x86_defs.h"
46 #include "guest_generic_x87.h"
47
48
49 /* This file contains helper functions for x86 guest code.
50 Calls to these functions are generated by the back end.
51 These calls are of course in the host machine code and
52 this file will be compiled to host machine code, so that
53 all makes sense.
54
55 Only change the signatures of these helper functions very
56 carefully. If you change the signature here, you'll have to change
57 the parameters passed to it in the IR calls constructed by
58 guest-x86/toIR.c.
59
60 The convention used is that all functions called from generated
61 code are named x86g_<something>, and any function whose name lacks
62 that prefix is not called from generated code. Note that some
63 LibVEX_* functions can however be called by VEX's client, but that
64 is not the same as calling them from VEX-generated code.
65 */
66
67
68 /* Set to 1 to get detailed profiling info about use of the flag
69 machinery. */
70 #define PROFILE_EFLAGS 0
71
72
73 /*---------------------------------------------------------------*/
74 /*--- %eflags run-time helpers. ---*/
75 /*---------------------------------------------------------------*/
76
77 static const UChar parity_table[256] = {
78 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
79 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
80 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
81 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
82 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
83 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
84 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
85 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
86 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
87 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
88 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
89 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
90 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
91 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
92 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
93 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
94 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
95 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
96 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
97 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
98 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
99 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
100 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
101 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
102 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
103 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
104 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
105 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
106 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
107 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
108 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
109 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
110 };
111
112 /* generalised left-shifter */
lshift(Int x,Int n)113 inline static Int lshift ( Int x, Int n )
114 {
115 if (n >= 0)
116 return (UInt)x << n;
117 else
118 return x >> (-n);
119 }
120
121 /* identity on ULong */
idULong(ULong x)122 static inline ULong idULong ( ULong x )
123 {
124 return x;
125 }
126
127
128 #define PREAMBLE(__data_bits) \
129 /* const */ UInt DATA_MASK \
130 = __data_bits==8 ? 0xFF \
131 : (__data_bits==16 ? 0xFFFF \
132 : 0xFFFFFFFF); \
133 /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1); \
134 /* const */ UInt CC_DEP1 = cc_dep1_formal; \
135 /* const */ UInt CC_DEP2 = cc_dep2_formal; \
136 /* const */ UInt CC_NDEP = cc_ndep_formal; \
137 /* Four bogus assignments, which hopefully gcc can */ \
138 /* optimise away, and which stop it complaining about */ \
139 /* unused variables. */ \
140 SIGN_MASK = SIGN_MASK; \
141 DATA_MASK = DATA_MASK; \
142 CC_DEP2 = CC_DEP2; \
143 CC_NDEP = CC_NDEP;
144
145
146 /*-------------------------------------------------------------*/
147
148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
149 { \
150 PREAMBLE(DATA_BITS); \
151 { UInt cf, pf, af, zf, sf, of; \
152 UInt argL, argR, res; \
153 argL = CC_DEP1; \
154 argR = CC_DEP2; \
155 res = argL + argR; \
156 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
157 pf = parity_table[(UChar)res]; \
158 af = (res ^ argL ^ argR) & 0x10; \
159 zf = ((DATA_UTYPE)res == 0) << 6; \
160 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
161 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
162 12 - DATA_BITS) & X86G_CC_MASK_O; \
163 return cf | pf | af | zf | sf | of; \
164 } \
165 }
166
167 /*-------------------------------------------------------------*/
168
169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
170 { \
171 PREAMBLE(DATA_BITS); \
172 { UInt cf, pf, af, zf, sf, of; \
173 UInt argL, argR, res; \
174 argL = CC_DEP1; \
175 argR = CC_DEP2; \
176 res = argL - argR; \
177 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
178 pf = parity_table[(UChar)res]; \
179 af = (res ^ argL ^ argR) & 0x10; \
180 zf = ((DATA_UTYPE)res == 0) << 6; \
181 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
182 of = lshift((argL ^ argR) & (argL ^ res), \
183 12 - DATA_BITS) & X86G_CC_MASK_O; \
184 return cf | pf | af | zf | sf | of; \
185 } \
186 }
187
188 /*-------------------------------------------------------------*/
189
190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
191 { \
192 PREAMBLE(DATA_BITS); \
193 { UInt cf, pf, af, zf, sf, of; \
194 UInt argL, argR, oldC, res; \
195 oldC = CC_NDEP & X86G_CC_MASK_C; \
196 argL = CC_DEP1; \
197 argR = CC_DEP2 ^ oldC; \
198 res = (argL + argR) + oldC; \
199 if (oldC) \
200 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
201 else \
202 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
203 pf = parity_table[(UChar)res]; \
204 af = (res ^ argL ^ argR) & 0x10; \
205 zf = ((DATA_UTYPE)res == 0) << 6; \
206 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
207 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
208 12 - DATA_BITS) & X86G_CC_MASK_O; \
209 return cf | pf | af | zf | sf | of; \
210 } \
211 }
212
213 /*-------------------------------------------------------------*/
214
215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
216 { \
217 PREAMBLE(DATA_BITS); \
218 { UInt cf, pf, af, zf, sf, of; \
219 UInt argL, argR, oldC, res; \
220 oldC = CC_NDEP & X86G_CC_MASK_C; \
221 argL = CC_DEP1; \
222 argR = CC_DEP2 ^ oldC; \
223 res = (argL - argR) - oldC; \
224 if (oldC) \
225 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
226 else \
227 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
228 pf = parity_table[(UChar)res]; \
229 af = (res ^ argL ^ argR) & 0x10; \
230 zf = ((DATA_UTYPE)res == 0) << 6; \
231 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
232 of = lshift((argL ^ argR) & (argL ^ res), \
233 12 - DATA_BITS) & X86G_CC_MASK_O; \
234 return cf | pf | af | zf | sf | of; \
235 } \
236 }
237
238 /*-------------------------------------------------------------*/
239
240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
241 { \
242 PREAMBLE(DATA_BITS); \
243 { UInt cf, pf, af, zf, sf, of; \
244 cf = 0; \
245 pf = parity_table[(UChar)CC_DEP1]; \
246 af = 0; \
247 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
248 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
249 of = 0; \
250 return cf | pf | af | zf | sf | of; \
251 } \
252 }
253
254 /*-------------------------------------------------------------*/
255
256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
257 { \
258 PREAMBLE(DATA_BITS); \
259 { UInt cf, pf, af, zf, sf, of; \
260 UInt argL, argR, res; \
261 res = CC_DEP1; \
262 argL = res - 1; \
263 argR = 1; \
264 cf = CC_NDEP & X86G_CC_MASK_C; \
265 pf = parity_table[(UChar)res]; \
266 af = (res ^ argL ^ argR) & 0x10; \
267 zf = ((DATA_UTYPE)res == 0) << 6; \
268 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
269 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
270 return cf | pf | af | zf | sf | of; \
271 } \
272 }
273
274 /*-------------------------------------------------------------*/
275
276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
277 { \
278 PREAMBLE(DATA_BITS); \
279 { UInt cf, pf, af, zf, sf, of; \
280 UInt argL, argR, res; \
281 res = CC_DEP1; \
282 argL = res + 1; \
283 argR = 1; \
284 cf = CC_NDEP & X86G_CC_MASK_C; \
285 pf = parity_table[(UChar)res]; \
286 af = (res ^ argL ^ argR) & 0x10; \
287 zf = ((DATA_UTYPE)res == 0) << 6; \
288 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
289 of = ((res & DATA_MASK) \
290 == ((UInt)SIGN_MASK - 1)) << 11; \
291 return cf | pf | af | zf | sf | of; \
292 } \
293 }
294
295 /*-------------------------------------------------------------*/
296
297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
298 { \
299 PREAMBLE(DATA_BITS); \
300 { UInt cf, pf, af, zf, sf, of; \
301 cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \
302 pf = parity_table[(UChar)CC_DEP1]; \
303 af = 0; /* undefined */ \
304 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
305 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
306 /* of is defined if shift count == 1 */ \
307 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
308 & X86G_CC_MASK_O; \
309 return cf | pf | af | zf | sf | of; \
310 } \
311 }
312
313 /*-------------------------------------------------------------*/
314
315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
316 { \
317 PREAMBLE(DATA_BITS); \
318 { UInt cf, pf, af, zf, sf, of; \
319 cf = CC_DEP2 & 1; \
320 pf = parity_table[(UChar)CC_DEP1]; \
321 af = 0; /* undefined */ \
322 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
323 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
324 /* of is defined if shift count == 1 */ \
325 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
326 & X86G_CC_MASK_O; \
327 return cf | pf | af | zf | sf | of; \
328 } \
329 }
330
331 /*-------------------------------------------------------------*/
332
333 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
334 /* DEP1 = result, NDEP = old flags */
335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
336 { \
337 PREAMBLE(DATA_BITS); \
338 { UInt fl \
339 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
340 | (X86G_CC_MASK_C & CC_DEP1) \
341 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
342 11-(DATA_BITS-1)) \
343 ^ lshift(CC_DEP1, 11))); \
344 return fl; \
345 } \
346 }
347
348 /*-------------------------------------------------------------*/
349
350 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
351 /* DEP1 = result, NDEP = old flags */
352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
353 { \
354 PREAMBLE(DATA_BITS); \
355 { UInt fl \
356 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
357 | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
358 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
359 11-(DATA_BITS-1)) \
360 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
361 return fl; \
362 } \
363 }
364
365 /*-------------------------------------------------------------*/
366
367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
368 DATA_U2TYPE, NARROWto2U) \
369 { \
370 PREAMBLE(DATA_BITS); \
371 { UInt cf, pf, af, zf, sf, of; \
372 DATA_UTYPE hi; \
373 DATA_UTYPE lo \
374 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
375 * ((DATA_UTYPE)CC_DEP2) ); \
376 DATA_U2TYPE rr \
377 = NARROWto2U( \
378 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
379 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
380 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
381 cf = (hi != 0); \
382 pf = parity_table[(UChar)lo]; \
383 af = 0; /* undefined */ \
384 zf = (lo == 0) << 6; \
385 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
386 of = cf << 11; \
387 return cf | pf | af | zf | sf | of; \
388 } \
389 }
390
391 /*-------------------------------------------------------------*/
392
393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
394 DATA_S2TYPE, NARROWto2S) \
395 { \
396 PREAMBLE(DATA_BITS); \
397 { UInt cf, pf, af, zf, sf, of; \
398 DATA_STYPE hi; \
399 DATA_STYPE lo \
400 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
401 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
402 DATA_S2TYPE rr \
403 = NARROWto2S( \
404 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
405 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
406 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
407 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
408 pf = parity_table[(UChar)lo]; \
409 af = 0; /* undefined */ \
410 zf = (lo == 0) << 6; \
411 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
412 of = cf << 11; \
413 return cf | pf | af | zf | sf | of; \
414 } \
415 }
416
417
418 #if PROFILE_EFLAGS
419
420 static Bool initted = False;
421
422 /* C flag, fast route */
423 static UInt tabc_fast[X86G_CC_OP_NUMBER];
424 /* C flag, slow route */
425 static UInt tabc_slow[X86G_CC_OP_NUMBER];
426 /* table for calculate_cond */
427 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
428 /* total entry counts for calc_all, calc_c, calc_cond. */
429 static UInt n_calc_all = 0;
430 static UInt n_calc_c = 0;
431 static UInt n_calc_cond = 0;
432
433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
434
435
showCounts(void)436 static void showCounts ( void )
437 {
438 Int op, co;
439 HChar ch;
440 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
441 n_calc_all, n_calc_cond, n_calc_c);
442
443 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
444 " S NS P NP L NL LE NLE\n");
445 vex_printf(" -----------------------------------------------------"
446 "----------------------------------------\n");
447 for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
448
449 ch = ' ';
450 if (op > 0 && (op-1) % 3 == 0)
451 ch = 'B';
452 if (op > 0 && (op-1) % 3 == 1)
453 ch = 'W';
454 if (op > 0 && (op-1) % 3 == 2)
455 ch = 'L';
456
457 vex_printf("%2d%c: ", op, ch);
458 vex_printf("%6u ", tabc_slow[op]);
459 vex_printf("%6u ", tabc_fast[op]);
460 for (co = 0; co < 16; co++) {
461 Int n = tab_cond[op][co];
462 if (n >= 1000) {
463 vex_printf(" %3dK", n / 1000);
464 } else
465 if (n >= 0) {
466 vex_printf(" %3d ", n );
467 } else {
468 vex_printf(" ");
469 }
470 }
471 vex_printf("\n");
472 }
473 vex_printf("\n");
474 }
475
initCounts(void)476 static void initCounts ( void )
477 {
478 Int op, co;
479 initted = True;
480 for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
481 tabc_fast[op] = tabc_slow[op] = 0;
482 for (co = 0; co < 16; co++)
483 tab_cond[op][co] = 0;
484 }
485 }
486
487 #endif /* PROFILE_EFLAGS */
488
489
490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
491 /* Calculate all the 6 flags from the supplied thunk parameters.
492 Worker function, not directly called from generated code. */
493 static
x86g_calculate_eflags_all_WRK(UInt cc_op,UInt cc_dep1_formal,UInt cc_dep2_formal,UInt cc_ndep_formal)494 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
495 UInt cc_dep1_formal,
496 UInt cc_dep2_formal,
497 UInt cc_ndep_formal )
498 {
499 switch (cc_op) {
500 case X86G_CC_OP_COPY:
501 return cc_dep1_formal
502 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
503 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
504
505 case X86G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
506 case X86G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
507 case X86G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
508
509 case X86G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
510 case X86G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
511 case X86G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
512
513 case X86G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
514 case X86G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
515 case X86G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
516
517 case X86G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
518 case X86G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
519 case X86G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
520
521 case X86G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
522 case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
523 case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
524
525 case X86G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
526 case X86G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
527 case X86G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
528
529 case X86G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
530 case X86G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
531 case X86G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
532
533 case X86G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
534 case X86G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
535 case X86G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
536
537 case X86G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
538 case X86G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
539 case X86G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
540
541 case X86G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
542 case X86G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
543 case X86G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
544
545 case X86G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
546 case X86G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
547 case X86G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
548
549 case X86G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
550 UShort, toUShort );
551 case X86G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
552 UInt, toUInt );
553 case X86G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
554 ULong, idULong );
555
556 case X86G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
557 Short, toUShort );
558 case X86G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
559 Int, toUInt );
560 case X86G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
561 Long, idULong );
562
563 default:
564 /* shouldn't really make these calls from generated code */
565 vex_printf("x86g_calculate_eflags_all_WRK(X86)"
566 "( %u, 0x%x, 0x%x, 0x%x )\n",
567 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
568 vpanic("x86g_calculate_eflags_all_WRK(X86)");
569 }
570 }
571
572
573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
574 /* Calculate all the 6 flags from the supplied thunk parameters. */
x86g_calculate_eflags_all(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)575 UInt x86g_calculate_eflags_all ( UInt cc_op,
576 UInt cc_dep1,
577 UInt cc_dep2,
578 UInt cc_ndep )
579 {
580 # if PROFILE_EFLAGS
581 if (!initted) initCounts();
582 n_calc_all++;
583 if (SHOW_COUNTS_NOW) showCounts();
584 # endif
585 return
586 x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
587 }
588
589
590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
591 /* Calculate just the carry flag from the supplied thunk parameters. */
592 VEX_REGPARM(3)
x86g_calculate_eflags_c(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)593 UInt x86g_calculate_eflags_c ( UInt cc_op,
594 UInt cc_dep1,
595 UInt cc_dep2,
596 UInt cc_ndep )
597 {
598 # if PROFILE_EFLAGS
599 if (!initted) initCounts();
600 n_calc_c++;
601 tabc_fast[cc_op]++;
602 if (SHOW_COUNTS_NOW) showCounts();
603 # endif
604
605 /* Fast-case some common ones. */
606 switch (cc_op) {
607 case X86G_CC_OP_LOGICL:
608 case X86G_CC_OP_LOGICW:
609 case X86G_CC_OP_LOGICB:
610 return 0;
611 case X86G_CC_OP_SUBL:
612 return ((UInt)cc_dep1) < ((UInt)cc_dep2)
613 ? X86G_CC_MASK_C : 0;
614 case X86G_CC_OP_SUBW:
615 return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
616 ? X86G_CC_MASK_C : 0;
617 case X86G_CC_OP_SUBB:
618 return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
619 ? X86G_CC_MASK_C : 0;
620 case X86G_CC_OP_INCL:
621 case X86G_CC_OP_DECL:
622 return cc_ndep & X86G_CC_MASK_C;
623 default:
624 break;
625 }
626
627 # if PROFILE_EFLAGS
628 tabc_fast[cc_op]--;
629 tabc_slow[cc_op]++;
630 # endif
631
632 return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
633 & X86G_CC_MASK_C;
634 }
635
636
637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
638 /* returns 1 or 0 */
x86g_calculate_condition(UInt cond,UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)639 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
640 UInt cc_op,
641 UInt cc_dep1,
642 UInt cc_dep2,
643 UInt cc_ndep )
644 {
645 UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
646 cc_dep2, cc_ndep);
647 UInt of,sf,zf,cf,pf;
648 UInt inv = cond & 1;
649
650 # if PROFILE_EFLAGS
651 if (!initted) initCounts();
652 tab_cond[cc_op][cond]++;
653 n_calc_cond++;
654 if (SHOW_COUNTS_NOW) showCounts();
655 # endif
656
657 switch (cond) {
658 case X86CondNO:
659 case X86CondO: /* OF == 1 */
660 of = eflags >> X86G_CC_SHIFT_O;
661 return 1 & (inv ^ of);
662
663 case X86CondNZ:
664 case X86CondZ: /* ZF == 1 */
665 zf = eflags >> X86G_CC_SHIFT_Z;
666 return 1 & (inv ^ zf);
667
668 case X86CondNB:
669 case X86CondB: /* CF == 1 */
670 cf = eflags >> X86G_CC_SHIFT_C;
671 return 1 & (inv ^ cf);
672 break;
673
674 case X86CondNBE:
675 case X86CondBE: /* (CF or ZF) == 1 */
676 cf = eflags >> X86G_CC_SHIFT_C;
677 zf = eflags >> X86G_CC_SHIFT_Z;
678 return 1 & (inv ^ (cf | zf));
679 break;
680
681 case X86CondNS:
682 case X86CondS: /* SF == 1 */
683 sf = eflags >> X86G_CC_SHIFT_S;
684 return 1 & (inv ^ sf);
685
686 case X86CondNP:
687 case X86CondP: /* PF == 1 */
688 pf = eflags >> X86G_CC_SHIFT_P;
689 return 1 & (inv ^ pf);
690
691 case X86CondNL:
692 case X86CondL: /* (SF xor OF) == 1 */
693 sf = eflags >> X86G_CC_SHIFT_S;
694 of = eflags >> X86G_CC_SHIFT_O;
695 return 1 & (inv ^ (sf ^ of));
696 break;
697
698 case X86CondNLE:
699 case X86CondLE: /* ((SF xor OF) or ZF) == 1 */
700 sf = eflags >> X86G_CC_SHIFT_S;
701 of = eflags >> X86G_CC_SHIFT_O;
702 zf = eflags >> X86G_CC_SHIFT_Z;
703 return 1 & (inv ^ ((sf ^ of) | zf));
704 break;
705
706 default:
707 /* shouldn't really make these calls from generated code */
708 vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
709 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
710 vpanic("x86g_calculate_condition");
711 }
712 }
713
714
715 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_get_eflags(const VexGuestX86State * vex_state)716 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
717 {
718 UInt eflags = x86g_calculate_eflags_all_WRK(
719 vex_state->guest_CC_OP,
720 vex_state->guest_CC_DEP1,
721 vex_state->guest_CC_DEP2,
722 vex_state->guest_CC_NDEP
723 );
724 UInt dflag = vex_state->guest_DFLAG;
725 vassert(dflag == 1 || dflag == 0xFFFFFFFF);
726 if (dflag == 0xFFFFFFFF)
727 eflags |= X86G_CC_MASK_D;
728 if (vex_state->guest_IDFLAG == 1)
729 eflags |= X86G_CC_MASK_ID;
730 if (vex_state->guest_ACFLAG == 1)
731 eflags |= X86G_CC_MASK_AC;
732
733 return eflags;
734 }
735
736 /* VISIBLE TO LIBVEX CLIENT */
737 void
LibVEX_GuestX86_put_eflags(UInt eflags,VexGuestX86State * vex_state)738 LibVEX_GuestX86_put_eflags ( UInt eflags,
739 /*MOD*/VexGuestX86State* vex_state )
740 {
741 /* D flag */
742 if (eflags & X86G_CC_MASK_D) {
743 vex_state->guest_DFLAG = 0xFFFFFFFF;
744 eflags &= ~X86G_CC_MASK_D;
745 }
746 else
747 vex_state->guest_DFLAG = 1;
748
749 /* ID flag */
750 if (eflags & X86G_CC_MASK_ID) {
751 vex_state->guest_IDFLAG = 1;
752 eflags &= ~X86G_CC_MASK_ID;
753 }
754 else
755 vex_state->guest_IDFLAG = 0;
756
757 /* AC flag */
758 if (eflags & X86G_CC_MASK_AC) {
759 vex_state->guest_ACFLAG = 1;
760 eflags &= ~X86G_CC_MASK_AC;
761 }
762 else
763 vex_state->guest_ACFLAG = 0;
764
765 UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z |
766 X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P;
767 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
768 vex_state->guest_CC_DEP1 = eflags & cc_mask;
769 vex_state->guest_CC_DEP2 = 0;
770 vex_state->guest_CC_NDEP = 0;
771 }
772
773 /* VISIBLE TO LIBVEX CLIENT */
774 void
LibVEX_GuestX86_put_eflag_c(UInt new_carry_flag,VexGuestX86State * vex_state)775 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
776 /*MOD*/VexGuestX86State* vex_state )
777 {
778 UInt oszacp = x86g_calculate_eflags_all_WRK(
779 vex_state->guest_CC_OP,
780 vex_state->guest_CC_DEP1,
781 vex_state->guest_CC_DEP2,
782 vex_state->guest_CC_NDEP
783 );
784 if (new_carry_flag & 1) {
785 oszacp |= X86G_CC_MASK_C;
786 } else {
787 oszacp &= ~X86G_CC_MASK_C;
788 }
789 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
790 vex_state->guest_CC_DEP1 = oszacp;
791 vex_state->guest_CC_DEP2 = 0;
792 vex_state->guest_CC_NDEP = 0;
793 }
794
795
796 /*---------------------------------------------------------------*/
797 /*--- %eflags translation-time function specialisers. ---*/
798 /*--- These help iropt specialise calls the above run-time ---*/
799 /*--- %eflags functions. ---*/
800 /*---------------------------------------------------------------*/
801
802 /* Used by the optimiser to try specialisations. Returns an
803 equivalent expression, or NULL if none. */
804
isU32(IRExpr * e,UInt n)805 static inline Bool isU32 ( IRExpr* e, UInt n )
806 {
807 return
808 toBool( e->tag == Iex_Const
809 && e->Iex.Const.con->tag == Ico_U32
810 && e->Iex.Const.con->Ico.U32 == n );
811 }
812
guest_x86_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)813 IRExpr* guest_x86_spechelper ( const HChar* function_name,
814 IRExpr** args,
815 IRStmt** precedingStmts,
816 Int n_precedingStmts )
817 {
818 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
819 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
820 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
821 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
822
823 Int i, arity = 0;
824 for (i = 0; args[i]; i++)
825 arity++;
826 # if 0
827 vex_printf("spec request:\n");
828 vex_printf(" %s ", function_name);
829 for (i = 0; i < arity; i++) {
830 vex_printf(" ");
831 ppIRExpr(args[i]);
832 }
833 vex_printf("\n");
834 # endif
835
836 /* --------- specialising "x86g_calculate_condition" --------- */
837
838 if (vex_streq(function_name, "x86g_calculate_condition")) {
839 /* specialise calls to above "calculate condition" function */
840 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
841 vassert(arity == 5);
842 cond = args[0];
843 cc_op = args[1];
844 cc_dep1 = args[2];
845 cc_dep2 = args[3];
846
847 /*---------------- ADDL ----------------*/
848
849 if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
850 /* long add, then Z --> test (dst+src == 0) */
851 return unop(Iop_1Uto32,
852 binop(Iop_CmpEQ32,
853 binop(Iop_Add32, cc_dep1, cc_dep2),
854 mkU32(0)));
855 }
856
857 /*---------------- SUBL ----------------*/
858
859 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
860 /* long sub/cmp, then Z --> test dst==src */
861 return unop(Iop_1Uto32,
862 binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
863 }
864 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
865 /* long sub/cmp, then NZ --> test dst!=src */
866 return unop(Iop_1Uto32,
867 binop(Iop_CmpNE32, cc_dep1, cc_dep2));
868 }
869
870 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
871 /* long sub/cmp, then L (signed less than)
872 --> test dst <s src */
873 return unop(Iop_1Uto32,
874 binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
875 }
876 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
877 /* long sub/cmp, then NL (signed greater than or equal)
878 --> test !(dst <s src) */
879 return binop(Iop_Xor32,
880 unop(Iop_1Uto32,
881 binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
882 mkU32(1));
883 }
884
885 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
886 /* long sub/cmp, then LE (signed less than or equal)
887 --> test dst <=s src */
888 return unop(Iop_1Uto32,
889 binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
890 }
891 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
892 /* long sub/cmp, then NLE (signed not less than or equal)
893 --> test dst >s src
894 --> test !(dst <=s src) */
895 return binop(Iop_Xor32,
896 unop(Iop_1Uto32,
897 binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
898 mkU32(1));
899 }
900
901 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
902 /* long sub/cmp, then BE (unsigned less than or equal)
903 --> test dst <=u src */
904 return unop(Iop_1Uto32,
905 binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
906 }
907 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
908 /* long sub/cmp, then BE (unsigned greater than)
909 --> test !(dst <=u src) */
910 return binop(Iop_Xor32,
911 unop(Iop_1Uto32,
912 binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
913 mkU32(1));
914 }
915
916 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
917 /* long sub/cmp, then B (unsigned less than)
918 --> test dst <u src */
919 return unop(Iop_1Uto32,
920 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
921 }
922 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
923 /* long sub/cmp, then NB (unsigned greater than or equal)
924 --> test !(dst <u src) */
925 return binop(Iop_Xor32,
926 unop(Iop_1Uto32,
927 binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
928 mkU32(1));
929 }
930
931 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
932 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
933 return unop(Iop_1Uto32,
934 binop(Iop_CmpLT32S,
935 binop(Iop_Sub32, cc_dep1, cc_dep2),
936 mkU32(0)));
937 }
938 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
939 /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
940 return binop(Iop_Xor32,
941 unop(Iop_1Uto32,
942 binop(Iop_CmpLT32S,
943 binop(Iop_Sub32, cc_dep1, cc_dep2),
944 mkU32(0))),
945 mkU32(1));
946 }
947
948 /*---------------- SUBW ----------------*/
949
950 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
951 /* word sub/cmp, then Z --> test dst==src */
952 return unop(Iop_1Uto32,
953 binop(Iop_CmpEQ16,
954 unop(Iop_32to16,cc_dep1),
955 unop(Iop_32to16,cc_dep2)));
956 }
957 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
958 /* word sub/cmp, then NZ --> test dst!=src */
959 return unop(Iop_1Uto32,
960 binop(Iop_CmpNE16,
961 unop(Iop_32to16,cc_dep1),
962 unop(Iop_32to16,cc_dep2)));
963 }
964
965 /*---------------- SUBB ----------------*/
966
967 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
968 /* byte sub/cmp, then Z --> test dst==src */
969 return unop(Iop_1Uto32,
970 binop(Iop_CmpEQ8,
971 unop(Iop_32to8,cc_dep1),
972 unop(Iop_32to8,cc_dep2)));
973 }
974 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
975 /* byte sub/cmp, then NZ --> test dst!=src */
976 return unop(Iop_1Uto32,
977 binop(Iop_CmpNE8,
978 unop(Iop_32to8,cc_dep1),
979 unop(Iop_32to8,cc_dep2)));
980 }
981
982 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
983 /* byte sub/cmp, then NBE (unsigned greater than)
984 --> test src <u dst */
985 /* Note, args are opposite way round from the usual */
986 return unop(Iop_1Uto32,
987 binop(Iop_CmpLT32U,
988 binop(Iop_And32,cc_dep2,mkU32(0xFF)),
989 binop(Iop_And32,cc_dep1,mkU32(0xFF))));
990 }
991
992 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
993 && isU32(cc_dep2, 0)) {
994 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
995 --> test dst <s 0
996 --> (UInt)dst[7]
997 This is yet another scheme by which gcc figures out if the
998 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
999 /* Note: isU32(cc_dep2, 0) is correct, even though this is
1000 for an 8-bit comparison, since the args to the helper
1001 function are always U32s. */
1002 return binop(Iop_And32,
1003 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1004 mkU32(1));
1005 }
1006 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
1007 && isU32(cc_dep2, 0)) {
1008 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1009 --> test !(dst <s 0)
1010 --> (UInt) !dst[7]
1011 */
1012 return binop(Iop_Xor32,
1013 binop(Iop_And32,
1014 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1015 mkU32(1)),
1016 mkU32(1));
1017 }
1018
1019 /*---------------- LOGICL ----------------*/
1020
1021 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
1022 /* long and/or/xor, then Z --> test dst==0 */
1023 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1024 }
1025 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
1026 /* long and/or/xor, then NZ --> test dst!=0 */
1027 return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
1028 }
1029
1030 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
1031 /* long and/or/xor, then LE
1032 This is pretty subtle. LOGIC sets SF and ZF according to the
1033 result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but
1034 OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
1035 the result is <=signed 0. Hence ...
1036 */
1037 return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1038 }
1039
1040 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1041 /* long and/or/xor, then BE
1042 LOGIC sets ZF according to the result and makes CF be zero.
1043 BE computes (CF | ZF), but CF is zero, so this reduces ZF
1044 -- which will be 1 iff the result is zero. Hence ...
1045 */
1046 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1047 }
1048
1049 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1050 /* see comment below for (LOGICB, CondS) */
1051 /* long and/or/xor, then S --> (UInt)result[31] */
1052 return binop(Iop_And32,
1053 binop(Iop_Shr32,cc_dep1,mkU8(31)),
1054 mkU32(1));
1055 }
1056 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1057 /* see comment below for (LOGICB, CondNS) */
1058 /* long and/or/xor, then S --> (UInt) ~ result[31] */
1059 return binop(Iop_Xor32,
1060 binop(Iop_And32,
1061 binop(Iop_Shr32,cc_dep1,mkU8(31)),
1062 mkU32(1)),
1063 mkU32(1));
1064 }
1065
1066 /*---------------- LOGICW ----------------*/
1067
1068 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1069 /* word and/or/xor, then Z --> test dst==0 */
1070 return unop(Iop_1Uto32,
1071 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1072 mkU32(0)));
1073 }
1074
1075 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1076 /* see comment below for (LOGICB, CondS) */
1077 /* word and/or/xor, then S --> (UInt)result[15] */
1078 return binop(Iop_And32,
1079 binop(Iop_Shr32,cc_dep1,mkU8(15)),
1080 mkU32(1));
1081 }
1082
1083 /*---------------- LOGICB ----------------*/
1084
1085 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1086 /* byte and/or/xor, then Z --> test dst==0 */
1087 return unop(Iop_1Uto32,
1088 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1089 mkU32(0)));
1090 }
1091 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1092 /* byte and/or/xor, then Z --> test dst!=0 */
1093 /* b9ac9: 84 c0 test %al,%al
1094 b9acb: 75 0d jne b9ada */
1095 return unop(Iop_1Uto32,
1096 binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1097 mkU32(0)));
1098 }
1099
1100 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1101 /* this is an idiom gcc sometimes uses to find out if the top
1102 bit of a byte register is set: eg testb %al,%al; js ..
1103 Since it just depends on the top bit of the byte, extract
1104 that bit and explicitly get rid of all the rest. This
1105 helps memcheck avoid false positives in the case where any
1106 of the other bits in the byte are undefined. */
1107 /* byte and/or/xor, then S --> (UInt)result[7] */
1108 return binop(Iop_And32,
1109 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1110 mkU32(1));
1111 }
1112 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1113 /* ditto, for negation-of-S. */
1114 /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1115 return binop(Iop_Xor32,
1116 binop(Iop_And32,
1117 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1118 mkU32(1)),
1119 mkU32(1));
1120 }
1121
1122 /*---------------- DECL ----------------*/
1123
1124 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1125 /* dec L, then Z --> test dst == 0 */
1126 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1127 }
1128
1129 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1130 /* dec L, then S --> compare DST <s 0 */
1131 return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1132 }
1133
1134 /*---------------- DECW ----------------*/
1135
1136 if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1137 /* dec W, then Z --> test dst == 0 */
1138 return unop(Iop_1Uto32,
1139 binop(Iop_CmpEQ32,
1140 binop(Iop_Shl32,cc_dep1,mkU8(16)),
1141 mkU32(0)));
1142 }
1143
1144 /*---------------- INCW ----------------*/
1145
1146 if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1147 /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1148 /* inc W, then Z --> test dst == 0 */
1149 return unop(Iop_1Uto32,
1150 binop(Iop_CmpEQ32,
1151 binop(Iop_Shl32,cc_dep1,mkU8(16)),
1152 mkU32(0)));
1153 }
1154
1155 /*---------------- SHRL ----------------*/
1156
1157 if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1158 /* SHRL, then Z --> test dep1 == 0 */
1159 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1160 }
1161
1162 /*---------------- COPY ----------------*/
1163 /* This can happen, as a result of x87 FP compares: "fcom ... ;
1164 fnstsw %ax ; sahf ; jbe" for example. */
1165
1166 if (isU32(cc_op, X86G_CC_OP_COPY) &&
1167 (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1168 /* COPY, then BE --> extract C and Z from dep1, and test
1169 (C or Z) == 1. */
1170 /* COPY, then NBE --> extract C and Z from dep1, and test
1171 (C or Z) == 0. */
1172 UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1173 return
1174 unop(
1175 Iop_1Uto32,
1176 binop(
1177 Iop_CmpEQ32,
1178 binop(
1179 Iop_And32,
1180 binop(
1181 Iop_Or32,
1182 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1183 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1184 ),
1185 mkU32(1)
1186 ),
1187 mkU32(nnn)
1188 )
1189 );
1190 }
1191
1192 if (isU32(cc_op, X86G_CC_OP_COPY)
1193 && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1194 /* COPY, then B --> extract C from dep1, and test (C == 1). */
1195 /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1196 UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1197 return
1198 unop(
1199 Iop_1Uto32,
1200 binop(
1201 Iop_CmpEQ32,
1202 binop(
1203 Iop_And32,
1204 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1205 mkU32(1)
1206 ),
1207 mkU32(nnn)
1208 )
1209 );
1210 }
1211
1212 if (isU32(cc_op, X86G_CC_OP_COPY)
1213 && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1214 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1215 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1216 UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1217 return
1218 unop(
1219 Iop_1Uto32,
1220 binop(
1221 Iop_CmpEQ32,
1222 binop(
1223 Iop_And32,
1224 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1225 mkU32(1)
1226 ),
1227 mkU32(nnn)
1228 )
1229 );
1230 }
1231
1232 if (isU32(cc_op, X86G_CC_OP_COPY)
1233 && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1234 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1235 /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1236 UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1237 return
1238 unop(
1239 Iop_1Uto32,
1240 binop(
1241 Iop_CmpEQ32,
1242 binop(
1243 Iop_And32,
1244 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1245 mkU32(1)
1246 ),
1247 mkU32(nnn)
1248 )
1249 );
1250 }
1251
1252 return NULL;
1253 }
1254
1255 /* --------- specialising "x86g_calculate_eflags_c" --------- */
1256
1257 if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1258 /* specialise calls to above "calculate_eflags_c" function */
1259 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1260 vassert(arity == 4);
1261 cc_op = args[0];
1262 cc_dep1 = args[1];
1263 cc_dep2 = args[2];
1264 cc_ndep = args[3];
1265
1266 if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1267 /* C after sub denotes unsigned less than */
1268 return unop(Iop_1Uto32,
1269 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1270 }
1271 if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1272 /* C after sub denotes unsigned less than */
1273 return unop(Iop_1Uto32,
1274 binop(Iop_CmpLT32U,
1275 binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1276 binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1277 }
1278 if (isU32(cc_op, X86G_CC_OP_LOGICL)
1279 || isU32(cc_op, X86G_CC_OP_LOGICW)
1280 || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1281 /* cflag after logic is zero */
1282 return mkU32(0);
1283 }
1284 if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1285 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1286 return cc_ndep;
1287 }
1288 if (isU32(cc_op, X86G_CC_OP_COPY)) {
1289 /* cflag after COPY is stored in DEP1. */
1290 return
1291 binop(
1292 Iop_And32,
1293 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1294 mkU32(1)
1295 );
1296 }
1297 if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1298 /* C after add denotes sum <u either arg */
1299 return unop(Iop_1Uto32,
1300 binop(Iop_CmpLT32U,
1301 binop(Iop_Add32, cc_dep1, cc_dep2),
1302 cc_dep1));
1303 }
1304 // ATC, requires verification, no test case known
1305 //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1306 // /* C after signed widening multiply denotes the case where
1307 // the top half of the result isn't simply the sign extension
1308 // of the bottom half (iow the result doesn't fit completely
1309 // in the bottom half). Hence:
1310 // C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1311 // where 'x' denotes signed widening multiply.*/
1312 // return
1313 // unop(Iop_1Uto32,
1314 // binop(Iop_CmpNE32,
1315 // unop(Iop_64HIto32,
1316 // binop(Iop_MullS32, cc_dep1, cc_dep2)),
1317 // binop(Iop_Sar32,
1318 // binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1319 //}
1320 # if 0
1321 if (cc_op->tag == Iex_Const) {
1322 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1323 }
1324 # endif
1325
1326 return NULL;
1327 }
1328
1329 /* --------- specialising "x86g_calculate_eflags_all" --------- */
1330
1331 if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1332 /* specialise calls to above "calculate_eflags_all" function */
1333 IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1334 vassert(arity == 4);
1335 cc_op = args[0];
1336 cc_dep1 = args[1];
1337 /* cc_dep2 = args[2]; */
1338 /* cc_ndep = args[3]; */
1339
1340 if (isU32(cc_op, X86G_CC_OP_COPY)) {
1341 /* eflags after COPY are stored in DEP1. */
1342 return
1343 binop(
1344 Iop_And32,
1345 cc_dep1,
1346 mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1347 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1348 );
1349 }
1350 return NULL;
1351 }
1352
1353 # undef unop
1354 # undef binop
1355 # undef mkU32
1356 # undef mkU8
1357
1358 return NULL;
1359 }
1360
1361
1362 /*---------------------------------------------------------------*/
1363 /*--- Supporting functions for x87 FPU activities. ---*/
1364 /*---------------------------------------------------------------*/
1365
host_is_little_endian(void)1366 static inline Bool host_is_little_endian ( void )
1367 {
1368 UInt x = 0x76543210;
1369 UChar* p = (UChar*)(&x);
1370 return toBool(*p == 0x10);
1371 }
1372
1373 /* 80 and 64-bit floating point formats:
1374
1375 80-bit:
1376
1377 S 0 0-------0 zero
1378 S 0 0X------X denormals
1379 S 1-7FFE 1X------X normals (all normals have leading 1)
1380 S 7FFF 10------0 infinity
1381 S 7FFF 10X-----X snan
1382 S 7FFF 11X-----X qnan
1383
1384 S is the sign bit. For runs X----X, at least one of the Xs must be
1385 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
1386 there is an explicitly represented leading 1, and a sign bit,
1387 giving 80 in total.
1388
1389 64-bit avoids the confusion of an explicitly represented leading 1
1390 and so is simpler:
1391
1392 S 0 0------0 zero
1393 S 0 X------X denormals
1394 S 1-7FE any normals
1395 S 7FF 0------0 infinity
1396 S 7FF 0X-----X snan
1397 S 7FF 1X-----X qnan
1398
1399 Exponent is 11 bits, fractional part is 52 bits, and there is a
1400 sign bit, giving 64 in total.
1401 */
1402
1403 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1404 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_FXAM(UInt tag,ULong dbl)1405 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1406 {
1407 Bool mantissaIsZero;
1408 Int bexp;
1409 UChar sign;
1410 UChar* f64;
1411
1412 vassert(host_is_little_endian());
1413
1414 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1415
1416 f64 = (UChar*)(&dbl);
1417 sign = toUChar( (f64[7] >> 7) & 1 );
1418
1419 /* First off, if the tag indicates the register was empty,
1420 return 1,0,sign,1 */
1421 if (tag == 0) {
1422 /* vex_printf("Empty\n"); */
1423 return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1424 | X86G_FC_MASK_C0;
1425 }
1426
1427 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1428 bexp &= 0x7FF;
1429
1430 mantissaIsZero
1431 = toBool(
1432 (f64[6] & 0x0F) == 0
1433 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1434 );
1435
1436 /* If both exponent and mantissa are zero, the value is zero.
1437 Return 1,0,sign,0. */
1438 if (bexp == 0 && mantissaIsZero) {
1439 /* vex_printf("Zero\n"); */
1440 return X86G_FC_MASK_C3 | 0
1441 | (sign << X86G_FC_SHIFT_C1) | 0;
1442 }
1443
1444 /* If exponent is zero but mantissa isn't, it's a denormal.
1445 Return 1,1,sign,0. */
1446 if (bexp == 0 && !mantissaIsZero) {
1447 /* vex_printf("Denormal\n"); */
1448 return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1449 | (sign << X86G_FC_SHIFT_C1) | 0;
1450 }
1451
1452 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1453 Return 0,1,sign,1. */
1454 if (bexp == 0x7FF && mantissaIsZero) {
1455 /* vex_printf("Inf\n"); */
1456 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1457 | X86G_FC_MASK_C0;
1458 }
1459
1460 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1461 Return 0,0,sign,1. */
1462 if (bexp == 0x7FF && !mantissaIsZero) {
1463 /* vex_printf("NaN\n"); */
1464 return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1465 }
1466
1467 /* Uh, ok, we give up. It must be a normal finite number.
1468 Return 0,1,sign,0.
1469 */
1470 /* vex_printf("normal\n"); */
1471 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1472 }
1473
1474
1475 /* CALLED FROM GENERATED CODE */
1476 /* DIRTY HELPER (reads guest memory) */
x86g_dirtyhelper_loadF80le(Addr addrU)1477 ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
1478 {
1479 ULong f64;
1480 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
1481 return f64;
1482 }
1483
1484 /* CALLED FROM GENERATED CODE */
1485 /* DIRTY HELPER (writes guest memory) */
x86g_dirtyhelper_storeF80le(Addr addrU,ULong f64)1486 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
1487 {
1488 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
1489 }
1490
1491
1492 /*----------------------------------------------*/
1493 /*--- The exported fns .. ---*/
1494 /*----------------------------------------------*/
1495
1496 /* Layout of the real x87 state. */
1497 /* 13 June 05: Fpu_State and auxiliary constants was moved to
1498 g_generic_x87.h */
1499
1500
1501 /* CLEAN HELPER */
1502 /* fpucw[15:0] contains a x87 native format FPU control word.
1503 Extract from it the required FPROUND value and any resulting
1504 emulation warning, and return (warn << 32) | fpround value.
1505 */
x86g_check_fldcw(UInt fpucw)1506 ULong x86g_check_fldcw ( UInt fpucw )
1507 {
1508 /* Decide on a rounding mode. fpucw[11:10] holds it. */
1509 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1510 UInt rmode = (fpucw >> 10) & 3;
1511
1512 /* Detect any required emulation warnings. */
1513 VexEmNote ew = EmNote_NONE;
1514
1515 if ((fpucw & 0x3F) != 0x3F) {
1516 /* unmasked exceptions! */
1517 ew = EmWarn_X86_x87exns;
1518 }
1519 else
1520 if (((fpucw >> 8) & 3) != 3) {
1521 /* unsupported precision */
1522 ew = EmWarn_X86_x87precision;
1523 }
1524
1525 return (((ULong)ew) << 32) | ((ULong)rmode);
1526 }
1527
1528 /* CLEAN HELPER */
1529 /* Given fpround as an IRRoundingMode value, create a suitable x87
1530 native format FPU control word. */
x86g_create_fpucw(UInt fpround)1531 UInt x86g_create_fpucw ( UInt fpround )
1532 {
1533 fpround &= 3;
1534 return 0x037F | (fpround << 10);
1535 }
1536
1537
1538 /* CLEAN HELPER */
1539 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1540 Extract from it the required SSEROUND value and any resulting
1541 emulation warning, and return (warn << 32) | sseround value.
1542 */
x86g_check_ldmxcsr(UInt mxcsr)1543 ULong x86g_check_ldmxcsr ( UInt mxcsr )
1544 {
1545 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
1546 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1547 UInt rmode = (mxcsr >> 13) & 3;
1548
1549 /* Detect any required emulation warnings. */
1550 VexEmNote ew = EmNote_NONE;
1551
1552 if ((mxcsr & 0x1F80) != 0x1F80) {
1553 /* unmasked exceptions! */
1554 ew = EmWarn_X86_sseExns;
1555 }
1556 else
1557 if (mxcsr & (1<<15)) {
1558 /* FZ is set */
1559 ew = EmWarn_X86_fz;
1560 }
1561 else
1562 if (mxcsr & (1<<6)) {
1563 /* DAZ is set */
1564 ew = EmWarn_X86_daz;
1565 }
1566
1567 return (((ULong)ew) << 32) | ((ULong)rmode);
1568 }
1569
1570
1571 /* CLEAN HELPER */
1572 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1573 native format MXCSR value. */
x86g_create_mxcsr(UInt sseround)1574 UInt x86g_create_mxcsr ( UInt sseround )
1575 {
1576 sseround &= 3;
1577 return 0x1F80 | (sseround << 13);
1578 }
1579
1580
1581 /* CALLED FROM GENERATED CODE */
1582 /* DIRTY HELPER (writes guest state) */
1583 /* Initialise the x87 FPU state as per 'finit'. */
x86g_dirtyhelper_FINIT(VexGuestX86State * gst)1584 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1585 {
1586 Int i;
1587 gst->guest_FTOP = 0;
1588 for (i = 0; i < 8; i++) {
1589 gst->guest_FPTAG[i] = 0; /* empty */
1590 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1591 }
1592 gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1593 gst->guest_FC3210 = 0;
1594 }
1595
1596
1597 /* This is used to implement both 'frstor' and 'fldenv'. The latter
1598 appears to differ from the former only in that the 8 FP registers
1599 themselves are not transferred into the guest state. */
1600 static
do_put_x87(Bool moveRegs,Fpu_State * x87_state,VexGuestX86State * vex_state)1601 VexEmNote do_put_x87 ( Bool moveRegs,
1602 /*IN*/Fpu_State* x87_state,
1603 /*OUT*/VexGuestX86State* vex_state )
1604 {
1605 Int stno, preg;
1606 UInt tag;
1607 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1608 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1609 UInt ftop = (x87_state->env[FP_ENV_STAT] >> 11) & 7;
1610 UInt tagw = x87_state->env[FP_ENV_TAG];
1611 UInt fpucw = x87_state->env[FP_ENV_CTRL];
1612 UInt c3210 = x87_state->env[FP_ENV_STAT] & 0x4700;
1613 VexEmNote ew;
1614 UInt fpround;
1615 ULong pair;
1616
1617 /* Copy registers and tags */
1618 for (stno = 0; stno < 8; stno++) {
1619 preg = (stno + ftop) & 7;
1620 tag = (tagw >> (2*preg)) & 3;
1621 if (tag == 3) {
1622 /* register is empty */
1623 /* hmm, if it's empty, does it still get written? Probably
1624 safer to say it does. If we don't, memcheck could get out
1625 of sync, in that it thinks all FP registers are defined by
1626 this helper, but in reality some have not been updated. */
1627 if (moveRegs)
1628 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1629 vexTags[preg] = 0;
1630 } else {
1631 /* register is non-empty */
1632 if (moveRegs)
1633 convert_f80le_to_f64le( &x87_state->reg[10*stno],
1634 (UChar*)&vexRegs[preg] );
1635 vexTags[preg] = 1;
1636 }
1637 }
1638
1639 /* stack pointer */
1640 vex_state->guest_FTOP = ftop;
1641
1642 /* status word */
1643 vex_state->guest_FC3210 = c3210;
1644
1645 /* handle the control word, setting FPROUND and detecting any
1646 emulation warnings. */
1647 pair = x86g_check_fldcw ( (UInt)fpucw );
1648 fpround = (UInt)pair;
1649 ew = (VexEmNote)(pair >> 32);
1650
1651 vex_state->guest_FPROUND = fpround & 3;
1652
1653 /* emulation warnings --> caller */
1654 return ew;
1655 }
1656
1657
1658 /* Create an x87 FPU state from the guest state, as close as
1659 we can approximate it. */
1660 static
do_get_x87(VexGuestX86State * vex_state,Fpu_State * x87_state)1661 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1662 /*OUT*/Fpu_State* x87_state )
1663 {
1664 Int i, stno, preg;
1665 UInt tagw;
1666 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1667 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1668 UInt ftop = vex_state->guest_FTOP;
1669 UInt c3210 = vex_state->guest_FC3210;
1670
1671 for (i = 0; i < 14; i++)
1672 x87_state->env[i] = 0;
1673
1674 x87_state->env[1] = x87_state->env[3] = x87_state->env[5]
1675 = x87_state->env[13] = 0xFFFF;
1676 x87_state->env[FP_ENV_STAT]
1677 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1678 x87_state->env[FP_ENV_CTRL]
1679 = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1680
1681 /* Dump the register stack in ST order. */
1682 tagw = 0;
1683 for (stno = 0; stno < 8; stno++) {
1684 preg = (stno + ftop) & 7;
1685 if (vexTags[preg] == 0) {
1686 /* register is empty */
1687 tagw |= (3 << (2*preg));
1688 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1689 &x87_state->reg[10*stno] );
1690 } else {
1691 /* register is full. */
1692 tagw |= (0 << (2*preg));
1693 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1694 &x87_state->reg[10*stno] );
1695 }
1696 }
1697 x87_state->env[FP_ENV_TAG] = toUShort(tagw);
1698 }
1699
1700
1701 /* CALLED FROM GENERATED CODE */
1702 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FXSAVE(VexGuestX86State * gst,HWord addr)1703 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1704 {
1705 /* Somewhat roundabout, but at least it's simple. */
1706 Fpu_State tmp;
1707 UShort* addrS = (UShort*)addr;
1708 UChar* addrC = (UChar*)addr;
1709 U128* xmm = (U128*)(addr + 160);
1710 UInt mxcsr;
1711 UShort fp_tags;
1712 UInt summary_tags;
1713 Int r, stno;
1714 UShort *srcS, *dstS;
1715
1716 do_get_x87( gst, &tmp );
1717 mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1718
1719 /* Now build the proper fxsave image from the x87 image we just
1720 made. */
1721
1722 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1723 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1724
1725 /* set addrS[2] in an endian-independent way */
1726 summary_tags = 0;
1727 fp_tags = tmp.env[FP_ENV_TAG];
1728 for (r = 0; r < 8; r++) {
1729 if ( ((fp_tags >> (2*r)) & 3) != 3 )
1730 summary_tags |= (1 << r);
1731 }
1732 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
1733 addrC[5] = 0; /* pad */
1734
1735 addrS[3] = 0; /* FOP: fpu opcode (bogus) */
1736 addrS[4] = 0;
1737 addrS[5] = 0; /* FPU IP (bogus) */
1738 addrS[6] = 0; /* FPU IP's segment selector (bogus) (although we
1739 could conceivably dump %CS here) */
1740
1741 addrS[7] = 0; /* Intel reserved */
1742
1743 addrS[8] = 0; /* FPU DP (operand pointer) (bogus) */
1744 addrS[9] = 0; /* FPU DP (operand pointer) (bogus) */
1745 addrS[10] = 0; /* segment selector for above operand pointer; %DS
1746 perhaps? */
1747 addrS[11] = 0; /* Intel reserved */
1748
1749 addrS[12] = toUShort(mxcsr); /* MXCSR */
1750 addrS[13] = toUShort(mxcsr >> 16);
1751
1752 addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1753 addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1754
1755 /* Copy in the FP registers, in ST order. */
1756 for (stno = 0; stno < 8; stno++) {
1757 srcS = (UShort*)(&tmp.reg[10*stno]);
1758 dstS = (UShort*)(&addrS[16 + 8*stno]);
1759 dstS[0] = srcS[0];
1760 dstS[1] = srcS[1];
1761 dstS[2] = srcS[2];
1762 dstS[3] = srcS[3];
1763 dstS[4] = srcS[4];
1764 dstS[5] = 0;
1765 dstS[6] = 0;
1766 dstS[7] = 0;
1767 }
1768
1769 /* That's the first 160 bytes of the image done. Now only %xmm0
1770 .. %xmm7 remain to be copied. If the host is big-endian, these
1771 need to be byte-swapped. */
1772 vassert(host_is_little_endian());
1773
1774 # define COPY_U128(_dst,_src) \
1775 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1776 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1777 while (0)
1778
1779 COPY_U128( xmm[0], gst->guest_XMM0 );
1780 COPY_U128( xmm[1], gst->guest_XMM1 );
1781 COPY_U128( xmm[2], gst->guest_XMM2 );
1782 COPY_U128( xmm[3], gst->guest_XMM3 );
1783 COPY_U128( xmm[4], gst->guest_XMM4 );
1784 COPY_U128( xmm[5], gst->guest_XMM5 );
1785 COPY_U128( xmm[6], gst->guest_XMM6 );
1786 COPY_U128( xmm[7], gst->guest_XMM7 );
1787
1788 # undef COPY_U128
1789 }
1790
1791
1792 /* CALLED FROM GENERATED CODE */
1793 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FXRSTOR(VexGuestX86State * gst,HWord addr)1794 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1795 {
1796 Fpu_State tmp;
1797 VexEmNote warnX87 = EmNote_NONE;
1798 VexEmNote warnXMM = EmNote_NONE;
1799 UShort* addrS = (UShort*)addr;
1800 UChar* addrC = (UChar*)addr;
1801 U128* xmm = (U128*)(addr + 160);
1802 UShort fp_tags;
1803 Int r, stno, i;
1804
1805 /* Restore %xmm0 .. %xmm7. If the host is big-endian, these need
1806 to be byte-swapped. */
1807 vassert(host_is_little_endian());
1808
1809 # define COPY_U128(_dst,_src) \
1810 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1811 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1812 while (0)
1813
1814 COPY_U128( gst->guest_XMM0, xmm[0] );
1815 COPY_U128( gst->guest_XMM1, xmm[1] );
1816 COPY_U128( gst->guest_XMM2, xmm[2] );
1817 COPY_U128( gst->guest_XMM3, xmm[3] );
1818 COPY_U128( gst->guest_XMM4, xmm[4] );
1819 COPY_U128( gst->guest_XMM5, xmm[5] );
1820 COPY_U128( gst->guest_XMM6, xmm[6] );
1821 COPY_U128( gst->guest_XMM7, xmm[7] );
1822
1823 # undef COPY_U128
1824
1825 /* Copy the x87 registers out of the image, into a temporary
1826 Fpu_State struct. */
1827
1828 /* LLVM on Darwin turns the following loop into a movaps plus a
1829 handful of scalar stores. This would work fine except for the
1830 fact that VEX doesn't keep the stack correctly (16-) aligned for
1831 the call, so it segfaults. Hence, split the loop into two
1832 pieces (and pray LLVM doesn't merely glue them back together) so
1833 it's composed only of scalar stores and so is alignment
1834 insensitive. Of course this is a kludge of the lamest kind --
1835 VEX should be fixed properly. */
1836 /* Code that seems to trigger the problem:
1837 for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1838 for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1839 __asm__ __volatile__("" ::: "memory");
1840 for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1841
1842 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1843 /* fill in tmp.reg[0..7] */
1844 for (stno = 0; stno < 8; stno++) {
1845 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1846 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1847 dstS[0] = srcS[0];
1848 dstS[1] = srcS[1];
1849 dstS[2] = srcS[2];
1850 dstS[3] = srcS[3];
1851 dstS[4] = srcS[4];
1852 }
1853 /* fill in tmp.env[0..13] */
1854 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1855 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1856
1857 fp_tags = 0;
1858 for (r = 0; r < 8; r++) {
1859 if (addrC[4] & (1<<r))
1860 fp_tags |= (0 << (2*r)); /* EMPTY */
1861 else
1862 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1863 }
1864 tmp.env[FP_ENV_TAG] = fp_tags;
1865
1866 /* Now write 'tmp' into the guest state. */
1867 warnX87 = do_put_x87( True/*moveRegs*/, &tmp, gst );
1868
1869 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1870 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1871 ULong w64 = x86g_check_ldmxcsr( w32 );
1872
1873 warnXMM = (VexEmNote)(w64 >> 32);
1874
1875 gst->guest_SSEROUND = w64 & 0xFFFFFFFF;
1876 }
1877
1878 /* Prefer an X87 emwarn over an XMM one, if both exist. */
1879 if (warnX87 != EmNote_NONE)
1880 return warnX87;
1881 else
1882 return warnXMM;
1883 }
1884
1885
1886 /* CALLED FROM GENERATED CODE */
1887 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSAVE(VexGuestX86State * gst,HWord addr)1888 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1889 {
1890 do_get_x87( gst, (Fpu_State*)addr );
1891 }
1892
1893 /* CALLED FROM GENERATED CODE */
1894 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FRSTOR(VexGuestX86State * gst,HWord addr)1895 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1896 {
1897 return do_put_x87( True/*regs too*/, (Fpu_State*)addr, gst );
1898 }
1899
1900 /* CALLED FROM GENERATED CODE */
1901 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSTENV(VexGuestX86State * gst,HWord addr)1902 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1903 {
1904 /* Somewhat roundabout, but at least it's simple. */
1905 Int i;
1906 UShort* addrP = (UShort*)addr;
1907 Fpu_State tmp;
1908 do_get_x87( gst, &tmp );
1909 for (i = 0; i < 14; i++)
1910 addrP[i] = tmp.env[i];
1911 }
1912
1913 /* CALLED FROM GENERATED CODE */
1914 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FLDENV(VexGuestX86State * gst,HWord addr)1915 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1916 {
1917 return do_put_x87( False/*don't move regs*/, (Fpu_State*)addr, gst);
1918 }
1919
1920 /* VISIBLE TO LIBVEX CLIENT */
1921 /* Do x87 save from the supplied VexGuestX86State structure and store the
1922 result at the given address which represents a buffer of at least 108
1923 bytes. */
LibVEX_GuestX86_get_x87(VexGuestX86State * vex_state,UChar * x87_state)1924 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1925 /*OUT*/UChar* x87_state )
1926 {
1927 do_get_x87 ( vex_state, (Fpu_State*)x87_state );
1928 }
1929
1930 /* VISIBLE TO LIBVEX CLIENT */
1931 /* Do x87 restore from the supplied address and store read values to the given
1932 VexGuestX86State structure. */
LibVEX_GuestX86_put_x87(UChar * x87_state,VexGuestX86State * vex_state)1933 VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
1934 /*MOD*/VexGuestX86State* vex_state )
1935 {
1936 return do_put_x87 ( True/*moveRegs*/, (Fpu_State*)x87_state, vex_state );
1937 }
1938
1939 /* VISIBLE TO LIBVEX CLIENT */
1940 /* Return mxcsr from the supplied VexGuestX86State structure. */
LibVEX_GuestX86_get_mxcsr(VexGuestX86State * vex_state)1941 UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state )
1942 {
1943 return x86g_create_mxcsr ( vex_state->guest_SSEROUND );
1944 }
1945
1946 /* VISIBLE TO LIBVEX CLIENT */
1947 /* Modify the given VexGuestX86State structure according to the passed mxcsr
1948 value. */
LibVEX_GuestX86_put_mxcsr(UInt mxcsr,VexGuestX86State * vex_state)1949 VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr,
1950 /*MOD*/VexGuestX86State* vex_state)
1951 {
1952 ULong w64 = x86g_check_ldmxcsr( mxcsr );
1953 vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF;
1954 return (VexEmNote)(w64 >> 32);
1955 }
1956
1957 /*---------------------------------------------------------------*/
1958 /*--- Misc integer helpers, including rotates and CPUID. ---*/
1959 /*---------------------------------------------------------------*/
1960
1961 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1962 /* Calculate both flags and value result for rotate right
1963 through the carry bit. Result in low 32 bits,
1964 new flags (OSZACP) in high 32 bits.
1965 */
x86g_calculate_RCR(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)1966 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1967 {
1968 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1969
1970 switch (sz) {
1971 case 4:
1972 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1973 of = ((arg >> 31) ^ cf) & 1;
1974 while (tempCOUNT > 0) {
1975 tempcf = arg & 1;
1976 arg = (arg >> 1) | (cf << 31);
1977 cf = tempcf;
1978 tempCOUNT--;
1979 }
1980 break;
1981 case 2:
1982 while (tempCOUNT >= 17) tempCOUNT -= 17;
1983 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1984 of = ((arg >> 15) ^ cf) & 1;
1985 while (tempCOUNT > 0) {
1986 tempcf = arg & 1;
1987 arg = ((arg >> 1) & 0x7FFF) | (cf << 15);
1988 cf = tempcf;
1989 tempCOUNT--;
1990 }
1991 break;
1992 case 1:
1993 while (tempCOUNT >= 9) tempCOUNT -= 9;
1994 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1995 of = ((arg >> 7) ^ cf) & 1;
1996 while (tempCOUNT > 0) {
1997 tempcf = arg & 1;
1998 arg = ((arg >> 1) & 0x7F) | (cf << 7);
1999 cf = tempcf;
2000 tempCOUNT--;
2001 }
2002 break;
2003 default:
2004 vpanic("calculate_RCR: invalid size");
2005 }
2006
2007 cf &= 1;
2008 of &= 1;
2009 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2010 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2011
2012 return (((ULong)eflags_in) << 32) | ((ULong)arg);
2013 }
2014
2015
2016 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2017 /* Calculate both flags and value result for rotate left
2018 through the carry bit. Result in low 32 bits,
2019 new flags (OSZACP) in high 32 bits.
2020 */
x86g_calculate_RCL(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)2021 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
2022 {
2023 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
2024
2025 switch (sz) {
2026 case 4:
2027 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2028 while (tempCOUNT > 0) {
2029 tempcf = (arg >> 31) & 1;
2030 arg = (arg << 1) | (cf & 1);
2031 cf = tempcf;
2032 tempCOUNT--;
2033 }
2034 of = ((arg >> 31) ^ cf) & 1;
2035 break;
2036 case 2:
2037 while (tempCOUNT >= 17) tempCOUNT -= 17;
2038 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2039 while (tempCOUNT > 0) {
2040 tempcf = (arg >> 15) & 1;
2041 arg = 0xFFFF & ((arg << 1) | (cf & 1));
2042 cf = tempcf;
2043 tempCOUNT--;
2044 }
2045 of = ((arg >> 15) ^ cf) & 1;
2046 break;
2047 case 1:
2048 while (tempCOUNT >= 9) tempCOUNT -= 9;
2049 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2050 while (tempCOUNT > 0) {
2051 tempcf = (arg >> 7) & 1;
2052 arg = 0xFF & ((arg << 1) | (cf & 1));
2053 cf = tempcf;
2054 tempCOUNT--;
2055 }
2056 of = ((arg >> 7) ^ cf) & 1;
2057 break;
2058 default:
2059 vpanic("calculate_RCL: invalid size");
2060 }
2061
2062 cf &= 1;
2063 of &= 1;
2064 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2065 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2066
2067 return (((ULong)eflags_in) << 32) | ((ULong)arg);
2068 }
2069
2070
2071 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2072 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2073 AX value in low half of arg, OSZACP in upper half.
2074 See guest-x86/toIR.c usage point for details.
2075 */
calc_parity_8bit(UInt w32)2076 static UInt calc_parity_8bit ( UInt w32 ) {
2077 UInt i;
2078 UInt p = 1;
2079 for (i = 0; i < 8; i++)
2080 p ^= (1 & (w32 >> i));
2081 return p;
2082 }
x86g_calculate_daa_das_aaa_aas(UInt flags_and_AX,UInt opcode)2083 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2084 {
2085 UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2086 UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2087 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2088 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2089 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2090 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2091 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2092 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2093 UInt result = 0;
2094
2095 switch (opcode) {
2096 case 0x27: { /* DAA */
2097 UInt old_AL = r_AL;
2098 UInt old_C = r_C;
2099 r_C = 0;
2100 if ((r_AL & 0xF) > 9 || r_A == 1) {
2101 r_AL = r_AL + 6;
2102 r_C = old_C;
2103 if (r_AL >= 0x100) r_C = 1;
2104 r_A = 1;
2105 } else {
2106 r_A = 0;
2107 }
2108 if (old_AL > 0x99 || old_C == 1) {
2109 r_AL = r_AL + 0x60;
2110 r_C = 1;
2111 } else {
2112 r_C = 0;
2113 }
2114 /* O is undefined. S Z and P are set according to the
2115 result. */
2116 r_AL &= 0xFF;
2117 r_O = 0; /* let's say */
2118 r_S = (r_AL & 0x80) ? 1 : 0;
2119 r_Z = (r_AL == 0) ? 1 : 0;
2120 r_P = calc_parity_8bit( r_AL );
2121 break;
2122 }
2123 case 0x2F: { /* DAS */
2124 UInt old_AL = r_AL;
2125 UInt old_C = r_C;
2126 r_C = 0;
2127 if ((r_AL & 0xF) > 9 || r_A == 1) {
2128 Bool borrow = r_AL < 6;
2129 r_AL = r_AL - 6;
2130 r_C = old_C;
2131 if (borrow) r_C = 1;
2132 r_A = 1;
2133 } else {
2134 r_A = 0;
2135 }
2136 if (old_AL > 0x99 || old_C == 1) {
2137 r_AL = r_AL - 0x60;
2138 r_C = 1;
2139 } else {
2140 /* Intel docs are wrong: r_C = 0; */
2141 }
2142 /* O is undefined. S Z and P are set according to the
2143 result. */
2144 r_AL &= 0xFF;
2145 r_O = 0; /* let's say */
2146 r_S = (r_AL & 0x80) ? 1 : 0;
2147 r_Z = (r_AL == 0) ? 1 : 0;
2148 r_P = calc_parity_8bit( r_AL );
2149 break;
2150 }
2151 case 0x37: { /* AAA */
2152 Bool nudge = r_AL > 0xF9;
2153 if ((r_AL & 0xF) > 9 || r_A == 1) {
2154 r_AL = r_AL + 6;
2155 r_AH = r_AH + 1 + (nudge ? 1 : 0);
2156 r_A = 1;
2157 r_C = 1;
2158 r_AL = r_AL & 0xF;
2159 } else {
2160 r_A = 0;
2161 r_C = 0;
2162 r_AL = r_AL & 0xF;
2163 }
2164 /* O S Z and P are undefined. */
2165 r_O = r_S = r_Z = r_P = 0; /* let's say */
2166 break;
2167 }
2168 case 0x3F: { /* AAS */
2169 Bool nudge = r_AL < 0x06;
2170 if ((r_AL & 0xF) > 9 || r_A == 1) {
2171 r_AL = r_AL - 6;
2172 r_AH = r_AH - 1 - (nudge ? 1 : 0);
2173 r_A = 1;
2174 r_C = 1;
2175 r_AL = r_AL & 0xF;
2176 } else {
2177 r_A = 0;
2178 r_C = 0;
2179 r_AL = r_AL & 0xF;
2180 }
2181 /* O S Z and P are undefined. */
2182 r_O = r_S = r_Z = r_P = 0; /* let's say */
2183 break;
2184 }
2185 default:
2186 vassert(0);
2187 }
2188 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2189 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2190 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2191 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2192 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2193 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2194 | ( (r_AH & 0xFF) << 8 )
2195 | ( (r_AL & 0xFF) << 0 );
2196 return result;
2197 }
2198
x86g_calculate_aad_aam(UInt flags_and_AX,UInt opcode)2199 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2200 {
2201 UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2202 UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2203 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2204 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2205 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2206 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2207 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2208 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2209 UInt result = 0;
2210
2211 switch (opcode) {
2212 case 0xD4: { /* AAM */
2213 r_AH = r_AL / 10;
2214 r_AL = r_AL % 10;
2215 break;
2216 }
2217 case 0xD5: { /* AAD */
2218 r_AL = ((r_AH * 10) + r_AL) & 0xff;
2219 r_AH = 0;
2220 break;
2221 }
2222 default:
2223 vassert(0);
2224 }
2225
2226 r_O = 0; /* let's say (undefined) */
2227 r_C = 0; /* let's say (undefined) */
2228 r_A = 0; /* let's say (undefined) */
2229 r_S = (r_AL & 0x80) ? 1 : 0;
2230 r_Z = (r_AL == 0) ? 1 : 0;
2231 r_P = calc_parity_8bit( r_AL );
2232
2233 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2234 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2235 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2236 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2237 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2238 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2239 | ( (r_AH & 0xFF) << 8 )
2240 | ( (r_AL & 0xFF) << 0 );
2241 return result;
2242 }
2243
2244
2245 /* CALLED FROM GENERATED CODE */
2246 /* DIRTY HELPER (non-referentially-transparent) */
2247 /* Horrible hack. On non-x86 platforms, return 1. */
x86g_dirtyhelper_RDTSC(void)2248 ULong x86g_dirtyhelper_RDTSC ( void )
2249 {
2250 # if defined(__i386__)
2251 ULong res;
2252 __asm__ __volatile__("rdtsc" : "=A" (res));
2253 return res;
2254 # else
2255 return 1ULL;
2256 # endif
2257 }
2258
2259
2260 /* CALLED FROM GENERATED CODE */
2261 /* DIRTY HELPER (modifies guest state) */
2262 /* Claim to be a P55C (Intel Pentium/MMX) */
x86g_dirtyhelper_CPUID_sse0(VexGuestX86State * st)2263 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2264 {
2265 switch (st->guest_EAX) {
2266 case 0:
2267 st->guest_EAX = 0x1;
2268 st->guest_EBX = 0x756e6547;
2269 st->guest_ECX = 0x6c65746e;
2270 st->guest_EDX = 0x49656e69;
2271 break;
2272 default:
2273 st->guest_EAX = 0x543;
2274 st->guest_EBX = 0x0;
2275 st->guest_ECX = 0x0;
2276 st->guest_EDX = 0x8001bf;
2277 break;
2278 }
2279 }
2280
2281 /* CALLED FROM GENERATED CODE */
2282 /* DIRTY HELPER (modifies guest state) */
2283 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2284 /* But without 3DNow support (weird, but we really don't support it). */
x86g_dirtyhelper_CPUID_mmxext(VexGuestX86State * st)2285 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
2286 {
2287 switch (st->guest_EAX) {
2288 /* vendor ID */
2289 case 0:
2290 st->guest_EAX = 0x1;
2291 st->guest_EBX = 0x68747541;
2292 st->guest_ECX = 0x444d4163;
2293 st->guest_EDX = 0x69746e65;
2294 break;
2295 /* feature bits */
2296 case 1:
2297 st->guest_EAX = 0x621;
2298 st->guest_EBX = 0x0;
2299 st->guest_ECX = 0x0;
2300 st->guest_EDX = 0x183f9ff;
2301 break;
2302 /* Highest Extended Function Supported (0x80000004 brand string) */
2303 case 0x80000000:
2304 st->guest_EAX = 0x80000004;
2305 st->guest_EBX = 0x68747541;
2306 st->guest_ECX = 0x444d4163;
2307 st->guest_EDX = 0x69746e65;
2308 break;
2309 /* Extended Processor Info and Feature Bits */
2310 case 0x80000001:
2311 st->guest_EAX = 0x721;
2312 st->guest_EBX = 0x0;
2313 st->guest_ECX = 0x0;
2314 st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
2315 break;
2316 /* Processor Brand String "AMD Athlon(tm) Processor" */
2317 case 0x80000002:
2318 st->guest_EAX = 0x20444d41;
2319 st->guest_EBX = 0x6c687441;
2320 st->guest_ECX = 0x74286e6f;
2321 st->guest_EDX = 0x5020296d;
2322 break;
2323 case 0x80000003:
2324 st->guest_EAX = 0x65636f72;
2325 st->guest_EBX = 0x726f7373;
2326 st->guest_ECX = 0x0;
2327 st->guest_EDX = 0x0;
2328 break;
2329 default:
2330 st->guest_EAX = 0x0;
2331 st->guest_EBX = 0x0;
2332 st->guest_ECX = 0x0;
2333 st->guest_EDX = 0x0;
2334 break;
2335 }
2336 }
2337
2338 /* CALLED FROM GENERATED CODE */
2339 /* DIRTY HELPER (modifies guest state) */
2340 /* Claim to be the following SSE1-capable CPU:
2341 vendor_id : GenuineIntel
2342 cpu family : 6
2343 model : 11
2344 model name : Intel(R) Pentium(R) III CPU family 1133MHz
2345 stepping : 1
2346 cpu MHz : 1131.013
2347 cache size : 512 KB
2348 */
x86g_dirtyhelper_CPUID_sse1(VexGuestX86State * st)2349 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2350 {
2351 switch (st->guest_EAX) {
2352 case 0:
2353 st->guest_EAX = 0x00000002;
2354 st->guest_EBX = 0x756e6547;
2355 st->guest_ECX = 0x6c65746e;
2356 st->guest_EDX = 0x49656e69;
2357 break;
2358 case 1:
2359 st->guest_EAX = 0x000006b1;
2360 st->guest_EBX = 0x00000004;
2361 st->guest_ECX = 0x00000000;
2362 st->guest_EDX = 0x0383fbff;
2363 break;
2364 default:
2365 st->guest_EAX = 0x03020101;
2366 st->guest_EBX = 0x00000000;
2367 st->guest_ECX = 0x00000000;
2368 st->guest_EDX = 0x0c040883;
2369 break;
2370 }
2371 }
2372
2373 /* Claim to be the following SSE2-capable CPU:
2374 vendor_id : GenuineIntel
2375 cpu family : 15
2376 model : 2
2377 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz
2378 stepping : 9
2379 microcode : 0x17
2380 cpu MHz : 2992.577
2381 cache size : 512 KB
2382 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
2383 pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
2384 pebs bts cid xtpr
2385 clflush size : 64
2386 cache_alignment : 128
2387 address sizes : 36 bits physical, 32 bits virtual
2388 */
x86g_dirtyhelper_CPUID_sse2(VexGuestX86State * st)2389 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2390 {
2391 switch (st->guest_EAX) {
2392 case 0:
2393 st->guest_EAX = 0x00000002;
2394 st->guest_EBX = 0x756e6547;
2395 st->guest_ECX = 0x6c65746e;
2396 st->guest_EDX = 0x49656e69;
2397 break;
2398 case 1:
2399 st->guest_EAX = 0x00000f29;
2400 st->guest_EBX = 0x01020809;
2401 st->guest_ECX = 0x00004400;
2402 st->guest_EDX = 0xbfebfbff;
2403 break;
2404 default:
2405 st->guest_EAX = 0x03020101;
2406 st->guest_EBX = 0x00000000;
2407 st->guest_ECX = 0x00000000;
2408 st->guest_EDX = 0x0c040883;
2409 break;
2410 }
2411 }
2412
2413 /* Claim to be the following SSSE3-capable CPU (2 x ...):
2414 vendor_id : GenuineIntel
2415 cpu family : 6
2416 model : 15
2417 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2418 stepping : 6
2419 cpu MHz : 2394.000
2420 cache size : 4096 KB
2421 physical id : 0
2422 siblings : 2
2423 core id : 0
2424 cpu cores : 2
2425 fpu : yes
2426 fpu_exception : yes
2427 cpuid level : 10
2428 wp : yes
2429 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2430 mtrr pge mca cmov pat pse36 clflush dts acpi
2431 mmx fxsr sse sse2 ss ht tm syscall nx lm
2432 constant_tsc pni monitor ds_cpl vmx est tm2
2433 cx16 xtpr lahf_lm
2434 bogomips : 4798.78
2435 clflush size : 64
2436 cache_alignment : 64
2437 address sizes : 36 bits physical, 48 bits virtual
2438 power management:
2439 */
x86g_dirtyhelper_CPUID_sse3(VexGuestX86State * st)2440 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st )
2441 {
2442 # define SET_ABCD(_a,_b,_c,_d) \
2443 do { st->guest_EAX = (UInt)(_a); \
2444 st->guest_EBX = (UInt)(_b); \
2445 st->guest_ECX = (UInt)(_c); \
2446 st->guest_EDX = (UInt)(_d); \
2447 } while (0)
2448
2449 switch (st->guest_EAX) {
2450 case 0x00000000:
2451 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2452 break;
2453 case 0x00000001:
2454 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2455 break;
2456 case 0x00000002:
2457 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2458 break;
2459 case 0x00000003:
2460 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2461 break;
2462 case 0x00000004: {
2463 switch (st->guest_ECX) {
2464 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2465 0x0000003f, 0x00000001); break;
2466 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2467 0x0000003f, 0x00000001); break;
2468 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2469 0x00000fff, 0x00000001); break;
2470 default: SET_ABCD(0x00000000, 0x00000000,
2471 0x00000000, 0x00000000); break;
2472 }
2473 break;
2474 }
2475 case 0x00000005:
2476 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2477 break;
2478 case 0x00000006:
2479 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2480 break;
2481 case 0x00000007:
2482 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2483 break;
2484 case 0x00000008:
2485 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2486 break;
2487 case 0x00000009:
2488 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2489 break;
2490 case 0x0000000a:
2491 unhandled_eax_value:
2492 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2493 break;
2494 case 0x80000000:
2495 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2496 break;
2497 case 0x80000001:
2498 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2499 break;
2500 case 0x80000002:
2501 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2502 break;
2503 case 0x80000003:
2504 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2505 break;
2506 case 0x80000004:
2507 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2508 break;
2509 case 0x80000005:
2510 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2511 break;
2512 case 0x80000006:
2513 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2514 break;
2515 case 0x80000007:
2516 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2517 break;
2518 case 0x80000008:
2519 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2520 break;
2521 default:
2522 goto unhandled_eax_value;
2523 }
2524 # undef SET_ABCD
2525 }
2526
2527
2528 /* CALLED FROM GENERATED CODE */
2529 /* DIRTY HELPER (non-referentially-transparent) */
2530 /* Horrible hack. On non-x86 platforms, return 0. */
x86g_dirtyhelper_IN(UInt portno,UInt sz)2531 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2532 {
2533 # if defined(__i386__)
2534 UInt r = 0;
2535 portno &= 0xFFFF;
2536 switch (sz) {
2537 case 4:
2538 __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2539 : "=a" (r) : "Nd" (portno));
2540 break;
2541 case 2:
2542 __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2543 : "=a" (r) : "Nd" (portno));
2544 break;
2545 case 1:
2546 __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2547 : "=a" (r) : "Nd" (portno));
2548 break;
2549 default:
2550 break;
2551 }
2552 return r;
2553 # else
2554 return 0;
2555 # endif
2556 }
2557
2558
2559 /* CALLED FROM GENERATED CODE */
2560 /* DIRTY HELPER (non-referentially-transparent) */
2561 /* Horrible hack. On non-x86 platforms, do nothing. */
x86g_dirtyhelper_OUT(UInt portno,UInt data,UInt sz)2562 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2563 {
2564 # if defined(__i386__)
2565 portno &= 0xFFFF;
2566 switch (sz) {
2567 case 4:
2568 __asm__ __volatile__("outl %0, %w1"
2569 : : "a" (data), "Nd" (portno));
2570 break;
2571 case 2:
2572 __asm__ __volatile__("outw %w0, %w1"
2573 : : "a" (data), "Nd" (portno));
2574 break;
2575 case 1:
2576 __asm__ __volatile__("outb %b0, %w1"
2577 : : "a" (data), "Nd" (portno));
2578 break;
2579 default:
2580 break;
2581 }
2582 # else
2583 /* do nothing */
2584 # endif
2585 }
2586
2587 /* CALLED FROM GENERATED CODE */
2588 /* DIRTY HELPER (non-referentially-transparent) */
2589 /* Horrible hack. On non-x86 platforms, do nothing. */
2590 /* op = 0: call the native SGDT instruction.
2591 op = 1: call the native SIDT instruction.
2592 */
x86g_dirtyhelper_SxDT(void * address,UInt op)2593 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2594 # if defined(__i386__)
2595 switch (op) {
2596 case 0:
2597 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2598 break;
2599 case 1:
2600 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2601 break;
2602 default:
2603 vpanic("x86g_dirtyhelper_SxDT");
2604 }
2605 # else
2606 /* do nothing */
2607 UChar* p = (UChar*)address;
2608 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2609 # endif
2610 }
2611
2612 /*---------------------------------------------------------------*/
2613 /*--- Helpers for MMX/SSE/SSE2. ---*/
2614 /*---------------------------------------------------------------*/
2615
abdU8(UChar xx,UChar yy)2616 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2617 return toUChar(xx>yy ? xx-yy : yy-xx);
2618 }
2619
mk32x2(UInt w1,UInt w0)2620 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2621 return (((ULong)w1) << 32) | ((ULong)w0);
2622 }
2623
sel16x4_3(ULong w64)2624 static inline UShort sel16x4_3 ( ULong w64 ) {
2625 UInt hi32 = toUInt(w64 >> 32);
2626 return toUShort(hi32 >> 16);
2627 }
sel16x4_2(ULong w64)2628 static inline UShort sel16x4_2 ( ULong w64 ) {
2629 UInt hi32 = toUInt(w64 >> 32);
2630 return toUShort(hi32);
2631 }
sel16x4_1(ULong w64)2632 static inline UShort sel16x4_1 ( ULong w64 ) {
2633 UInt lo32 = toUInt(w64);
2634 return toUShort(lo32 >> 16);
2635 }
sel16x4_0(ULong w64)2636 static inline UShort sel16x4_0 ( ULong w64 ) {
2637 UInt lo32 = toUInt(w64);
2638 return toUShort(lo32);
2639 }
2640
sel8x8_7(ULong w64)2641 static inline UChar sel8x8_7 ( ULong w64 ) {
2642 UInt hi32 = toUInt(w64 >> 32);
2643 return toUChar(hi32 >> 24);
2644 }
sel8x8_6(ULong w64)2645 static inline UChar sel8x8_6 ( ULong w64 ) {
2646 UInt hi32 = toUInt(w64 >> 32);
2647 return toUChar(hi32 >> 16);
2648 }
sel8x8_5(ULong w64)2649 static inline UChar sel8x8_5 ( ULong w64 ) {
2650 UInt hi32 = toUInt(w64 >> 32);
2651 return toUChar(hi32 >> 8);
2652 }
sel8x8_4(ULong w64)2653 static inline UChar sel8x8_4 ( ULong w64 ) {
2654 UInt hi32 = toUInt(w64 >> 32);
2655 return toUChar(hi32 >> 0);
2656 }
sel8x8_3(ULong w64)2657 static inline UChar sel8x8_3 ( ULong w64 ) {
2658 UInt lo32 = toUInt(w64);
2659 return toUChar(lo32 >> 24);
2660 }
sel8x8_2(ULong w64)2661 static inline UChar sel8x8_2 ( ULong w64 ) {
2662 UInt lo32 = toUInt(w64);
2663 return toUChar(lo32 >> 16);
2664 }
sel8x8_1(ULong w64)2665 static inline UChar sel8x8_1 ( ULong w64 ) {
2666 UInt lo32 = toUInt(w64);
2667 return toUChar(lo32 >> 8);
2668 }
sel8x8_0(ULong w64)2669 static inline UChar sel8x8_0 ( ULong w64 ) {
2670 UInt lo32 = toUInt(w64);
2671 return toUChar(lo32 >> 0);
2672 }
2673
2674 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_pmaddwd(ULong xx,ULong yy)2675 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2676 {
2677 return
2678 mk32x2(
2679 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2680 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2681 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2682 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2683 );
2684 }
2685
2686 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_psadbw(ULong xx,ULong yy)2687 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2688 {
2689 UInt t = 0;
2690 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2691 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2692 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2693 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2694 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2695 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2696 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2697 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2698 t &= 0xFFFF;
2699 return (ULong)t;
2700 }
2701
2702
2703 /*---------------------------------------------------------------*/
2704 /*--- Helpers for dealing with segment overrides. ---*/
2705 /*---------------------------------------------------------------*/
2706
2707 static inline
get_segdescr_base(VexGuestX86SegDescr * ent)2708 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2709 {
2710 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2711 UInt mid = 0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2712 UInt hi = 0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2713 return (hi << 24) | (mid << 16) | lo;
2714 }
2715
2716 static inline
get_segdescr_limit(VexGuestX86SegDescr * ent)2717 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2718 {
2719 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2720 UInt hi = 0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2721 UInt limit = (hi << 16) | lo;
2722 if (ent->LdtEnt.Bits.Granularity)
2723 limit = (limit << 12) | 0xFFF;
2724 return limit;
2725 }
2726
2727 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_use_seg_selector(HWord ldt,HWord gdt,UInt seg_selector,UInt virtual_addr)2728 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2729 UInt seg_selector, UInt virtual_addr )
2730 {
2731 UInt tiBit, base, limit;
2732 VexGuestX86SegDescr* the_descrs;
2733
2734 Bool verboze = False;
2735
2736 /* If this isn't true, we're in Big Trouble. */
2737 vassert(8 == sizeof(VexGuestX86SegDescr));
2738
2739 if (verboze)
2740 vex_printf("x86h_use_seg_selector: "
2741 "seg_selector = 0x%x, vaddr = 0x%x\n",
2742 seg_selector, virtual_addr);
2743
2744 /* Check for wildly invalid selector. */
2745 if (seg_selector & ~0xFFFF)
2746 goto bad;
2747
2748 seg_selector &= 0x0000FFFF;
2749
2750 /* Sanity check the segment selector. Ensure that RPL=11b (least
2751 privilege). This forms the bottom 2 bits of the selector. */
2752 if ((seg_selector & 3) != 3)
2753 goto bad;
2754
2755 /* Extract the TI bit (0 means GDT, 1 means LDT) */
2756 tiBit = (seg_selector >> 2) & 1;
2757
2758 /* Convert the segment selector onto a table index */
2759 seg_selector >>= 3;
2760 vassert(seg_selector >= 0 && seg_selector < 8192);
2761
2762 if (tiBit == 0) {
2763
2764 /* GDT access. */
2765 /* Do we actually have a GDT to look at? */
2766 if (gdt == 0)
2767 goto bad;
2768
2769 /* Check for access to non-existent entry. */
2770 if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
2771 goto bad;
2772
2773 the_descrs = (VexGuestX86SegDescr*)gdt;
2774 base = get_segdescr_base (&the_descrs[seg_selector]);
2775 limit = get_segdescr_limit(&the_descrs[seg_selector]);
2776
2777 } else {
2778
2779 /* All the same stuff, except for the LDT. */
2780 if (ldt == 0)
2781 goto bad;
2782
2783 if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2784 goto bad;
2785
2786 the_descrs = (VexGuestX86SegDescr*)ldt;
2787 base = get_segdescr_base (&the_descrs[seg_selector]);
2788 limit = get_segdescr_limit(&the_descrs[seg_selector]);
2789
2790 }
2791
2792 /* Do the limit check. Note, this check is just slightly too
2793 slack. Really it should be "if (virtual_addr + size - 1 >=
2794 limit)," but we don't have the size info to hand. Getting it
2795 could be significantly complex. */
2796 if (virtual_addr >= limit)
2797 goto bad;
2798
2799 if (verboze)
2800 vex_printf("x86h_use_seg_selector: "
2801 "base = 0x%x, addr = 0x%x\n",
2802 base, base + virtual_addr);
2803
2804 /* High 32 bits are zero, indicating success. */
2805 return (ULong)( ((UInt)virtual_addr) + base );
2806
2807 bad:
2808 return 1ULL << 32;
2809 }
2810
2811
2812 /*---------------------------------------------------------------*/
2813 /*--- Helpers for dealing with, and describing, ---*/
2814 /*--- guest state as a whole. ---*/
2815 /*---------------------------------------------------------------*/
2816
2817 /* Initialise the entire x86 guest state. */
2818 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_initialise(VexGuestX86State * vex_state)2819 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2820 {
2821 vex_state->host_EvC_FAILADDR = 0;
2822 vex_state->host_EvC_COUNTER = 0;
2823
2824 vex_state->guest_EAX = 0;
2825 vex_state->guest_ECX = 0;
2826 vex_state->guest_EDX = 0;
2827 vex_state->guest_EBX = 0;
2828 vex_state->guest_ESP = 0;
2829 vex_state->guest_EBP = 0;
2830 vex_state->guest_ESI = 0;
2831 vex_state->guest_EDI = 0;
2832
2833 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
2834 vex_state->guest_CC_DEP1 = 0;
2835 vex_state->guest_CC_DEP2 = 0;
2836 vex_state->guest_CC_NDEP = 0;
2837 vex_state->guest_DFLAG = 1; /* forwards */
2838 vex_state->guest_IDFLAG = 0;
2839 vex_state->guest_ACFLAG = 0;
2840
2841 vex_state->guest_EIP = 0;
2842
2843 /* Initialise the simulated FPU */
2844 x86g_dirtyhelper_FINIT( vex_state );
2845
2846 /* Initialse the SSE state. */
2847 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2848
2849 vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2850 SSEZERO(vex_state->guest_XMM0);
2851 SSEZERO(vex_state->guest_XMM1);
2852 SSEZERO(vex_state->guest_XMM2);
2853 SSEZERO(vex_state->guest_XMM3);
2854 SSEZERO(vex_state->guest_XMM4);
2855 SSEZERO(vex_state->guest_XMM5);
2856 SSEZERO(vex_state->guest_XMM6);
2857 SSEZERO(vex_state->guest_XMM7);
2858
2859 # undef SSEZERO
2860
2861 vex_state->guest_CS = 0;
2862 vex_state->guest_DS = 0;
2863 vex_state->guest_ES = 0;
2864 vex_state->guest_FS = 0;
2865 vex_state->guest_GS = 0;
2866 vex_state->guest_SS = 0;
2867 vex_state->guest_LDT = 0;
2868 vex_state->guest_GDT = 0;
2869
2870 vex_state->guest_EMNOTE = EmNote_NONE;
2871
2872 /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2873 vex_state->guest_CMSTART = 0;
2874 vex_state->guest_CMLEN = 0;
2875
2876 vex_state->guest_NRADDR = 0;
2877 vex_state->guest_SC_CLASS = 0;
2878 vex_state->guest_IP_AT_SYSCALL = 0;
2879
2880 vex_state->padding1 = 0;
2881 vex_state->padding2 = 0;
2882 vex_state->padding3 = 0;
2883 }
2884
2885
2886 /* Figure out if any part of the guest state contained in minoff
2887 .. maxoff requires precise memory exceptions. If in doubt return
2888 True (but this generates significantly slower code).
2889
2890 By default we enforce precise exns for guest %ESP, %EBP and %EIP
2891 only. These are the minimum needed to extract correct stack
2892 backtraces from x86 code.
2893
2894 Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2895 */
guest_x86_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)2896 Bool guest_x86_state_requires_precise_mem_exns (
2897 Int minoff, Int maxoff, VexRegisterUpdates pxControl
2898 )
2899 {
2900 Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2901 Int ebp_max = ebp_min + 4 - 1;
2902 Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2903 Int esp_max = esp_min + 4 - 1;
2904 Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2905 Int eip_max = eip_min + 4 - 1;
2906
2907 if (maxoff < esp_min || minoff > esp_max) {
2908 /* no overlap with esp */
2909 if (pxControl == VexRegUpdSpAtMemAccess)
2910 return False; // We only need to check stack pointer.
2911 } else {
2912 return True;
2913 }
2914
2915 if (maxoff < ebp_min || minoff > ebp_max) {
2916 /* no overlap with ebp */
2917 } else {
2918 return True;
2919 }
2920
2921 if (maxoff < eip_min || minoff > eip_max) {
2922 /* no overlap with eip */
2923 } else {
2924 return True;
2925 }
2926
2927 return False;
2928 }
2929
2930
2931 #define ALWAYSDEFD(field) \
2932 { offsetof(VexGuestX86State, field), \
2933 (sizeof ((VexGuestX86State*)0)->field) }
2934
2935 VexGuestLayout
2936 x86guest_layout
2937 = {
2938 /* Total size of the guest state, in bytes. */
2939 .total_sizeB = sizeof(VexGuestX86State),
2940
2941 /* Describe the stack pointer. */
2942 .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2943 .sizeof_SP = 4,
2944
2945 /* Describe the frame pointer. */
2946 .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2947 .sizeof_FP = 4,
2948
2949 /* Describe the instruction pointer. */
2950 .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2951 .sizeof_IP = 4,
2952
2953 /* Describe any sections to be regarded by Memcheck as
2954 'always-defined'. */
2955 .n_alwaysDefd = 24,
2956
2957 /* flags thunk: OP and NDEP are always defd, whereas DEP1
2958 and DEP2 have to be tracked. See detailed comment in
2959 gdefs.h on meaning of thunk fields. */
2960 .alwaysDefd
2961 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
2962 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
2963 /* 2 */ ALWAYSDEFD(guest_DFLAG),
2964 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
2965 /* 4 */ ALWAYSDEFD(guest_ACFLAG),
2966 /* 5 */ ALWAYSDEFD(guest_EIP),
2967 /* 6 */ ALWAYSDEFD(guest_FTOP),
2968 /* 7 */ ALWAYSDEFD(guest_FPTAG),
2969 /* 8 */ ALWAYSDEFD(guest_FPROUND),
2970 /* 9 */ ALWAYSDEFD(guest_FC3210),
2971 /* 10 */ ALWAYSDEFD(guest_CS),
2972 /* 11 */ ALWAYSDEFD(guest_DS),
2973 /* 12 */ ALWAYSDEFD(guest_ES),
2974 /* 13 */ ALWAYSDEFD(guest_FS),
2975 /* 14 */ ALWAYSDEFD(guest_GS),
2976 /* 15 */ ALWAYSDEFD(guest_SS),
2977 /* 16 */ ALWAYSDEFD(guest_LDT),
2978 /* 17 */ ALWAYSDEFD(guest_GDT),
2979 /* 18 */ ALWAYSDEFD(guest_EMNOTE),
2980 /* 19 */ ALWAYSDEFD(guest_SSEROUND),
2981 /* 20 */ ALWAYSDEFD(guest_CMSTART),
2982 /* 21 */ ALWAYSDEFD(guest_CMLEN),
2983 /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
2984 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
2985 }
2986 };
2987
2988
2989 /*---------------------------------------------------------------*/
2990 /*--- end guest_x86_helpers.c ---*/
2991 /*---------------------------------------------------------------*/
2992