• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                               guest_x86_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2017 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_x86.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41 
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_x86_defs.h"
46 #include "guest_generic_x87.h"
47 
48 
49 /* This file contains helper functions for x86 guest code.
50    Calls to these functions are generated by the back end.
51    These calls are of course in the host machine code and
52    this file will be compiled to host machine code, so that
53    all makes sense.
54 
55    Only change the signatures of these helper functions very
56    carefully.  If you change the signature here, you'll have to change
57    the parameters passed to it in the IR calls constructed by
58    guest-x86/toIR.c.
59 
60    The convention used is that all functions called from generated
61    code are named x86g_<something>, and any function whose name lacks
62    that prefix is not called from generated code.  Note that some
63    LibVEX_* functions can however be called by VEX's client, but that
64    is not the same as calling them from VEX-generated code.
65 */
66 
67 
68 /* Set to 1 to get detailed profiling info about use of the flag
69    machinery. */
70 #define PROFILE_EFLAGS 0
71 
72 
73 /*---------------------------------------------------------------*/
74 /*--- %eflags run-time helpers.                               ---*/
75 /*---------------------------------------------------------------*/
76 
77 static const UChar parity_table[256] = {
78     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
79     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
80     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
81     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
82     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
83     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
84     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
85     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
86     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
87     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
88     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
89     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
90     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
91     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
92     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
93     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
94     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
95     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
96     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
97     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
98     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
99     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
100     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
101     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
102     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
103     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
104     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
105     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
106     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
107     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
108     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
109     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
110 };
111 
112 /* generalised left-shifter */
lshift(Int x,Int n)113 inline static Int lshift ( Int x, Int n )
114 {
115    if (n >= 0)
116       return (UInt)x << n;
117    else
118       return x >> (-n);
119 }
120 
121 /* identity on ULong */
idULong(ULong x)122 static inline ULong idULong ( ULong x )
123 {
124    return x;
125 }
126 
127 
128 #define PREAMBLE(__data_bits)					\
129    /* const */ UInt DATA_MASK 					\
130       = __data_bits==8 ? 0xFF 					\
131                        : (__data_bits==16 ? 0xFFFF 		\
132                                           : 0xFFFFFFFF); 	\
133    /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1);	\
134    /* const */ UInt CC_DEP1 = cc_dep1_formal;			\
135    /* const */ UInt CC_DEP2 = cc_dep2_formal;			\
136    /* const */ UInt CC_NDEP = cc_ndep_formal;			\
137    /* Four bogus assignments, which hopefully gcc can     */	\
138    /* optimise away, and which stop it complaining about  */	\
139    /* unused variables.                                   */	\
140    SIGN_MASK = SIGN_MASK;					\
141    DATA_MASK = DATA_MASK;					\
142    CC_DEP2 = CC_DEP2;						\
143    CC_NDEP = CC_NDEP;
144 
145 
146 /*-------------------------------------------------------------*/
147 
148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
149 {								\
150    PREAMBLE(DATA_BITS);						\
151    { UInt cf, pf, af, zf, sf, of;				\
152      UInt argL, argR, res;					\
153      argL = CC_DEP1;						\
154      argR = CC_DEP2;						\
155      res  = argL + argR;					\
156      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
157      pf = parity_table[(UChar)res];				\
158      af = (res ^ argL ^ argR) & 0x10;				\
159      zf = ((DATA_UTYPE)res == 0) << 6;				\
160      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
161      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
162                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
163      return cf | pf | af | zf | sf | of;			\
164    }								\
165 }
166 
167 /*-------------------------------------------------------------*/
168 
169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
170 {								\
171    PREAMBLE(DATA_BITS);						\
172    { UInt cf, pf, af, zf, sf, of;				\
173      UInt argL, argR, res;					\
174      argL = CC_DEP1;						\
175      argR = CC_DEP2;						\
176      res  = argL - argR;					\
177      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
178      pf = parity_table[(UChar)res];				\
179      af = (res ^ argL ^ argR) & 0x10;				\
180      zf = ((DATA_UTYPE)res == 0) << 6;				\
181      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
182      of = lshift((argL ^ argR) & (argL ^ res),	 		\
183                  12 - DATA_BITS) & X86G_CC_MASK_O; 		\
184      return cf | pf | af | zf | sf | of;			\
185    }								\
186 }
187 
188 /*-------------------------------------------------------------*/
189 
190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
191 {								\
192    PREAMBLE(DATA_BITS);						\
193    { UInt cf, pf, af, zf, sf, of;				\
194      UInt argL, argR, oldC, res;		       		\
195      oldC = CC_NDEP & X86G_CC_MASK_C;				\
196      argL = CC_DEP1;						\
197      argR = CC_DEP2 ^ oldC;	       				\
198      res  = (argL + argR) + oldC;				\
199      if (oldC)							\
200         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
201      else							\
202         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
203      pf = parity_table[(UChar)res];				\
204      af = (res ^ argL ^ argR) & 0x10;				\
205      zf = ((DATA_UTYPE)res == 0) << 6;				\
206      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
207      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
208                   12 - DATA_BITS) & X86G_CC_MASK_O;		\
209      return cf | pf | af | zf | sf | of;			\
210    }								\
211 }
212 
213 /*-------------------------------------------------------------*/
214 
215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
216 {								\
217    PREAMBLE(DATA_BITS);						\
218    { UInt cf, pf, af, zf, sf, of;				\
219      UInt argL, argR, oldC, res;		       		\
220      oldC = CC_NDEP & X86G_CC_MASK_C;				\
221      argL = CC_DEP1;						\
222      argR = CC_DEP2 ^ oldC;	       				\
223      res  = (argL - argR) - oldC;				\
224      if (oldC)							\
225         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
226      else							\
227         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
228      pf = parity_table[(UChar)res];				\
229      af = (res ^ argL ^ argR) & 0x10;				\
230      zf = ((DATA_UTYPE)res == 0) << 6;				\
231      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
232      of = lshift((argL ^ argR) & (argL ^ res), 			\
233                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
234      return cf | pf | af | zf | sf | of;			\
235    }								\
236 }
237 
238 /*-------------------------------------------------------------*/
239 
240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
241 {								\
242    PREAMBLE(DATA_BITS);						\
243    { UInt cf, pf, af, zf, sf, of;				\
244      cf = 0;							\
245      pf = parity_table[(UChar)CC_DEP1];				\
246      af = 0;							\
247      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
248      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
249      of = 0;							\
250      return cf | pf | af | zf | sf | of;			\
251    }								\
252 }
253 
254 /*-------------------------------------------------------------*/
255 
256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
257 {								\
258    PREAMBLE(DATA_BITS);						\
259    { UInt cf, pf, af, zf, sf, of;				\
260      UInt argL, argR, res;					\
261      res  = CC_DEP1;						\
262      argL = res - 1;						\
263      argR = 1;							\
264      cf = CC_NDEP & X86G_CC_MASK_C;				\
265      pf = parity_table[(UChar)res];				\
266      af = (res ^ argL ^ argR) & 0x10;				\
267      zf = ((DATA_UTYPE)res == 0) << 6;				\
268      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
269      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
270      return cf | pf | af | zf | sf | of;			\
271    }								\
272 }
273 
274 /*-------------------------------------------------------------*/
275 
276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
277 {								\
278    PREAMBLE(DATA_BITS);						\
279    { UInt cf, pf, af, zf, sf, of;				\
280      UInt argL, argR, res;					\
281      res  = CC_DEP1;						\
282      argL = res + 1;						\
283      argR = 1;							\
284      cf = CC_NDEP & X86G_CC_MASK_C;				\
285      pf = parity_table[(UChar)res];				\
286      af = (res ^ argL ^ argR) & 0x10;				\
287      zf = ((DATA_UTYPE)res == 0) << 6;				\
288      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
289      of = ((res & DATA_MASK) 					\
290           == ((UInt)SIGN_MASK - 1)) << 11;			\
291      return cf | pf | af | zf | sf | of;			\
292    }								\
293 }
294 
295 /*-------------------------------------------------------------*/
296 
297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
298 {								\
299    PREAMBLE(DATA_BITS);						\
300    { UInt cf, pf, af, zf, sf, of;				\
301      cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C;	\
302      pf = parity_table[(UChar)CC_DEP1];				\
303      af = 0; /* undefined */					\
304      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
305      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
306      /* of is defined if shift count == 1 */			\
307      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
308           & X86G_CC_MASK_O;					\
309      return cf | pf | af | zf | sf | of;			\
310    }								\
311 }
312 
313 /*-------------------------------------------------------------*/
314 
315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
316 {								\
317    PREAMBLE(DATA_BITS);  					\
318    { UInt cf, pf, af, zf, sf, of;				\
319      cf = CC_DEP2 & 1;						\
320      pf = parity_table[(UChar)CC_DEP1];				\
321      af = 0; /* undefined */					\
322      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
323      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
324      /* of is defined if shift count == 1 */			\
325      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
326           & X86G_CC_MASK_O;					\
327      return cf | pf | af | zf | sf | of;			\
328    }								\
329 }
330 
331 /*-------------------------------------------------------------*/
332 
333 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
334 /* DEP1 = result, NDEP = old flags */
335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
336 {								\
337    PREAMBLE(DATA_BITS);						\
338    { UInt fl 							\
339         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
340           | (X86G_CC_MASK_C & CC_DEP1)				\
341           | (X86G_CC_MASK_O & (lshift(CC_DEP1,  		\
342                                       11-(DATA_BITS-1)) 	\
343                      ^ lshift(CC_DEP1, 11)));			\
344      return fl;							\
345    }								\
346 }
347 
348 /*-------------------------------------------------------------*/
349 
350 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
351 /* DEP1 = result, NDEP = old flags */
352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
353 {								\
354    PREAMBLE(DATA_BITS);						\
355    { UInt fl 							\
356         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
357           | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
358           | (X86G_CC_MASK_O & (lshift(CC_DEP1, 			\
359                                       11-(DATA_BITS-1)) 	\
360                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
361      return fl;							\
362    }								\
363 }
364 
365 /*-------------------------------------------------------------*/
366 
367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
368                                 DATA_U2TYPE, NARROWto2U)        \
369 {                                                               \
370    PREAMBLE(DATA_BITS);                                         \
371    { UInt cf, pf, af, zf, sf, of;                               \
372      DATA_UTYPE  hi;                                            \
373      DATA_UTYPE  lo                                             \
374         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
375                      * ((DATA_UTYPE)CC_DEP2) );                 \
376      DATA_U2TYPE rr                                             \
377         = NARROWto2U(                                           \
378              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
379              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
380      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
381      cf = (hi != 0);                                            \
382      pf = parity_table[(UChar)lo];                              \
383      af = 0; /* undefined */                                    \
384      zf = (lo == 0) << 6;                                       \
385      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
386      of = cf << 11;                                             \
387      return cf | pf | af | zf | sf | of;                        \
388    }								\
389 }
390 
391 /*-------------------------------------------------------------*/
392 
393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
394                                 DATA_S2TYPE, NARROWto2S)        \
395 {                                                               \
396    PREAMBLE(DATA_BITS);                                         \
397    { UInt cf, pf, af, zf, sf, of;                               \
398      DATA_STYPE  hi;                                            \
399      DATA_STYPE  lo                                             \
400         = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1)         \
401                      * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) );    \
402      DATA_S2TYPE rr                                             \
403         = NARROWto2S(                                           \
404              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
405              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
406      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
407      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
408      pf = parity_table[(UChar)lo];                              \
409      af = 0; /* undefined */                                    \
410      zf = (lo == 0) << 6;                                       \
411      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
412      of = cf << 11;                                             \
413      return cf | pf | af | zf | sf | of;                        \
414    }								\
415 }
416 
417 
418 #if PROFILE_EFLAGS
419 
420 static Bool initted     = False;
421 
422 /* C flag, fast route */
423 static UInt tabc_fast[X86G_CC_OP_NUMBER];
424 /* C flag, slow route */
425 static UInt tabc_slow[X86G_CC_OP_NUMBER];
426 /* table for calculate_cond */
427 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
428 /* total entry counts for calc_all, calc_c, calc_cond. */
429 static UInt n_calc_all  = 0;
430 static UInt n_calc_c    = 0;
431 static UInt n_calc_cond = 0;
432 
433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
434 
435 
showCounts(void)436 static void showCounts ( void )
437 {
438    Int op, co;
439    HChar ch;
440    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
441               n_calc_all, n_calc_cond, n_calc_c);
442 
443    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
444               "    S   NS    P   NP    L   NL   LE  NLE\n");
445    vex_printf("     -----------------------------------------------------"
446               "----------------------------------------\n");
447    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
448 
449       ch = ' ';
450       if (op > 0 && (op-1) % 3 == 0)
451          ch = 'B';
452       if (op > 0 && (op-1) % 3 == 1)
453          ch = 'W';
454       if (op > 0 && (op-1) % 3 == 2)
455          ch = 'L';
456 
457       vex_printf("%2d%c: ", op, ch);
458       vex_printf("%6u ", tabc_slow[op]);
459       vex_printf("%6u ", tabc_fast[op]);
460       for (co = 0; co < 16; co++) {
461          Int n = tab_cond[op][co];
462          if (n >= 1000) {
463             vex_printf(" %3dK", n / 1000);
464          } else
465          if (n >= 0) {
466             vex_printf(" %3d ", n );
467          } else {
468             vex_printf("     ");
469          }
470       }
471       vex_printf("\n");
472    }
473    vex_printf("\n");
474 }
475 
initCounts(void)476 static void initCounts ( void )
477 {
478    Int op, co;
479    initted = True;
480    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
481       tabc_fast[op] = tabc_slow[op] = 0;
482       for (co = 0; co < 16; co++)
483          tab_cond[op][co] = 0;
484    }
485 }
486 
487 #endif /* PROFILE_EFLAGS */
488 
489 
490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
491 /* Calculate all the 6 flags from the supplied thunk parameters.
492    Worker function, not directly called from generated code. */
493 static
x86g_calculate_eflags_all_WRK(UInt cc_op,UInt cc_dep1_formal,UInt cc_dep2_formal,UInt cc_ndep_formal)494 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
495                                      UInt cc_dep1_formal,
496                                      UInt cc_dep2_formal,
497                                      UInt cc_ndep_formal )
498 {
499    switch (cc_op) {
500       case X86G_CC_OP_COPY:
501          return cc_dep1_formal
502                 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
503                    | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
504 
505       case X86G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
506       case X86G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
507       case X86G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
508 
509       case X86G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
510       case X86G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
511       case X86G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
512 
513       case X86G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
514       case X86G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
515       case X86G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
516 
517       case X86G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
518       case X86G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
519       case X86G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
520 
521       case X86G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
522       case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
523       case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
524 
525       case X86G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
526       case X86G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
527       case X86G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
528 
529       case X86G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
530       case X86G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
531       case X86G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
532 
533       case X86G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
534       case X86G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
535       case X86G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
536 
537       case X86G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
538       case X86G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
539       case X86G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
540 
541       case X86G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
542       case X86G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
543       case X86G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
544 
545       case X86G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
546       case X86G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
547       case X86G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
548 
549       case X86G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
550                                                 UShort, toUShort );
551       case X86G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
552                                                 UInt,   toUInt );
553       case X86G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
554                                                 ULong,  idULong );
555 
556       case X86G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
557                                                 Short,  toUShort );
558       case X86G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
559                                                 Int,    toUInt   );
560       case X86G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
561                                                 Long,   idULong );
562 
563       default:
564          /* shouldn't really make these calls from generated code */
565          vex_printf("x86g_calculate_eflags_all_WRK(X86)"
566                     "( %u, 0x%x, 0x%x, 0x%x )\n",
567                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
568          vpanic("x86g_calculate_eflags_all_WRK(X86)");
569    }
570 }
571 
572 
573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
574 /* Calculate all the 6 flags from the supplied thunk parameters. */
x86g_calculate_eflags_all(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)575 UInt x86g_calculate_eflags_all ( UInt cc_op,
576                                  UInt cc_dep1,
577                                  UInt cc_dep2,
578                                  UInt cc_ndep )
579 {
580 #  if PROFILE_EFLAGS
581    if (!initted) initCounts();
582    n_calc_all++;
583    if (SHOW_COUNTS_NOW) showCounts();
584 #  endif
585    return
586       x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
587 }
588 
589 
590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
591 /* Calculate just the carry flag from the supplied thunk parameters. */
592 VEX_REGPARM(3)
x86g_calculate_eflags_c(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)593 UInt x86g_calculate_eflags_c ( UInt cc_op,
594                                UInt cc_dep1,
595                                UInt cc_dep2,
596                                UInt cc_ndep )
597 {
598 #  if PROFILE_EFLAGS
599    if (!initted) initCounts();
600    n_calc_c++;
601    tabc_fast[cc_op]++;
602    if (SHOW_COUNTS_NOW) showCounts();
603 #  endif
604 
605    /* Fast-case some common ones. */
606    switch (cc_op) {
607       case X86G_CC_OP_LOGICL:
608       case X86G_CC_OP_LOGICW:
609       case X86G_CC_OP_LOGICB:
610          return 0;
611       case X86G_CC_OP_SUBL:
612          return ((UInt)cc_dep1) < ((UInt)cc_dep2)
613                    ? X86G_CC_MASK_C : 0;
614       case X86G_CC_OP_SUBW:
615          return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
616                    ? X86G_CC_MASK_C : 0;
617       case X86G_CC_OP_SUBB:
618          return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
619                    ? X86G_CC_MASK_C : 0;
620       case X86G_CC_OP_INCL:
621       case X86G_CC_OP_DECL:
622          return cc_ndep & X86G_CC_MASK_C;
623       default:
624          break;
625    }
626 
627 #  if PROFILE_EFLAGS
628    tabc_fast[cc_op]--;
629    tabc_slow[cc_op]++;
630 #  endif
631 
632    return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
633           & X86G_CC_MASK_C;
634 }
635 
636 
637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
638 /* returns 1 or 0 */
x86g_calculate_condition(UInt cond,UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)639 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
640                                 UInt cc_op,
641                                 UInt cc_dep1,
642                                 UInt cc_dep2,
643                                 UInt cc_ndep )
644 {
645    UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
646                                                cc_dep2, cc_ndep);
647    UInt of,sf,zf,cf,pf;
648    UInt inv = cond & 1;
649 
650 #  if PROFILE_EFLAGS
651    if (!initted) initCounts();
652    tab_cond[cc_op][cond]++;
653    n_calc_cond++;
654    if (SHOW_COUNTS_NOW) showCounts();
655 #  endif
656 
657    switch (cond) {
658       case X86CondNO:
659       case X86CondO: /* OF == 1 */
660          of = eflags >> X86G_CC_SHIFT_O;
661          return 1 & (inv ^ of);
662 
663       case X86CondNZ:
664       case X86CondZ: /* ZF == 1 */
665          zf = eflags >> X86G_CC_SHIFT_Z;
666          return 1 & (inv ^ zf);
667 
668       case X86CondNB:
669       case X86CondB: /* CF == 1 */
670          cf = eflags >> X86G_CC_SHIFT_C;
671          return 1 & (inv ^ cf);
672          break;
673 
674       case X86CondNBE:
675       case X86CondBE: /* (CF or ZF) == 1 */
676          cf = eflags >> X86G_CC_SHIFT_C;
677          zf = eflags >> X86G_CC_SHIFT_Z;
678          return 1 & (inv ^ (cf | zf));
679          break;
680 
681       case X86CondNS:
682       case X86CondS: /* SF == 1 */
683          sf = eflags >> X86G_CC_SHIFT_S;
684          return 1 & (inv ^ sf);
685 
686       case X86CondNP:
687       case X86CondP: /* PF == 1 */
688          pf = eflags >> X86G_CC_SHIFT_P;
689          return 1 & (inv ^ pf);
690 
691       case X86CondNL:
692       case X86CondL: /* (SF xor OF) == 1 */
693          sf = eflags >> X86G_CC_SHIFT_S;
694          of = eflags >> X86G_CC_SHIFT_O;
695          return 1 & (inv ^ (sf ^ of));
696          break;
697 
698       case X86CondNLE:
699       case X86CondLE: /* ((SF xor OF) or ZF)  == 1 */
700          sf = eflags >> X86G_CC_SHIFT_S;
701          of = eflags >> X86G_CC_SHIFT_O;
702          zf = eflags >> X86G_CC_SHIFT_Z;
703          return 1 & (inv ^ ((sf ^ of) | zf));
704          break;
705 
706       default:
707          /* shouldn't really make these calls from generated code */
708          vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
709                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
710          vpanic("x86g_calculate_condition");
711    }
712 }
713 
714 
715 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_get_eflags(const VexGuestX86State * vex_state)716 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
717 {
718    UInt eflags = x86g_calculate_eflags_all_WRK(
719                     vex_state->guest_CC_OP,
720                     vex_state->guest_CC_DEP1,
721                     vex_state->guest_CC_DEP2,
722                     vex_state->guest_CC_NDEP
723                  );
724    UInt dflag = vex_state->guest_DFLAG;
725    vassert(dflag == 1 || dflag == 0xFFFFFFFF);
726    if (dflag == 0xFFFFFFFF)
727       eflags |= X86G_CC_MASK_D;
728    if (vex_state->guest_IDFLAG == 1)
729       eflags |= X86G_CC_MASK_ID;
730    if (vex_state->guest_ACFLAG == 1)
731       eflags |= X86G_CC_MASK_AC;
732 
733    return eflags;
734 }
735 
736 /* VISIBLE TO LIBVEX CLIENT */
737 void
LibVEX_GuestX86_put_eflags(UInt eflags,VexGuestX86State * vex_state)738 LibVEX_GuestX86_put_eflags ( UInt eflags,
739                              /*MOD*/VexGuestX86State* vex_state )
740 {
741    /* D flag */
742    if (eflags & X86G_CC_MASK_D) {
743       vex_state->guest_DFLAG = 0xFFFFFFFF;
744       eflags &= ~X86G_CC_MASK_D;
745    }
746    else
747       vex_state->guest_DFLAG = 1;
748 
749    /* ID flag */
750    if (eflags & X86G_CC_MASK_ID) {
751       vex_state->guest_IDFLAG = 1;
752       eflags &= ~X86G_CC_MASK_ID;
753    }
754    else
755       vex_state->guest_IDFLAG = 0;
756 
757    /* AC flag */
758    if (eflags & X86G_CC_MASK_AC) {
759       vex_state->guest_ACFLAG = 1;
760       eflags &= ~X86G_CC_MASK_AC;
761    }
762    else
763       vex_state->guest_ACFLAG = 0;
764 
765    UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z |
766                   X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P;
767    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
768    vex_state->guest_CC_DEP1 = eflags & cc_mask;
769    vex_state->guest_CC_DEP2 = 0;
770    vex_state->guest_CC_NDEP = 0;
771 }
772 
773 /* VISIBLE TO LIBVEX CLIENT */
774 void
LibVEX_GuestX86_put_eflag_c(UInt new_carry_flag,VexGuestX86State * vex_state)775 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
776                               /*MOD*/VexGuestX86State* vex_state )
777 {
778    UInt oszacp = x86g_calculate_eflags_all_WRK(
779                     vex_state->guest_CC_OP,
780                     vex_state->guest_CC_DEP1,
781                     vex_state->guest_CC_DEP2,
782                     vex_state->guest_CC_NDEP
783                  );
784    if (new_carry_flag & 1) {
785       oszacp |= X86G_CC_MASK_C;
786    } else {
787       oszacp &= ~X86G_CC_MASK_C;
788    }
789    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
790    vex_state->guest_CC_DEP1 = oszacp;
791    vex_state->guest_CC_DEP2 = 0;
792    vex_state->guest_CC_NDEP = 0;
793 }
794 
795 
796 /*---------------------------------------------------------------*/
797 /*--- %eflags translation-time function specialisers.         ---*/
798 /*--- These help iropt specialise calls the above run-time    ---*/
799 /*--- %eflags functions.                                      ---*/
800 /*---------------------------------------------------------------*/
801 
802 /* Used by the optimiser to try specialisations.  Returns an
803    equivalent expression, or NULL if none. */
804 
isU32(IRExpr * e,UInt n)805 static inline Bool isU32 ( IRExpr* e, UInt n )
806 {
807    return
808       toBool( e->tag == Iex_Const
809               && e->Iex.Const.con->tag == Ico_U32
810               && e->Iex.Const.con->Ico.U32 == n );
811 }
812 
guest_x86_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)813 IRExpr* guest_x86_spechelper ( const HChar* function_name,
814                                IRExpr** args,
815                                IRStmt** precedingStmts,
816                                Int      n_precedingStmts )
817 {
818 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
819 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
820 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
821 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
822 
823    Int i, arity = 0;
824    for (i = 0; args[i]; i++)
825       arity++;
826 #  if 0
827    vex_printf("spec request:\n");
828    vex_printf("   %s  ", function_name);
829    for (i = 0; i < arity; i++) {
830       vex_printf("  ");
831       ppIRExpr(args[i]);
832    }
833    vex_printf("\n");
834 #  endif
835 
836    /* --------- specialising "x86g_calculate_condition" --------- */
837 
838    if (vex_streq(function_name, "x86g_calculate_condition")) {
839       /* specialise calls to above "calculate condition" function */
840       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
841       vassert(arity == 5);
842       cond    = args[0];
843       cc_op   = args[1];
844       cc_dep1 = args[2];
845       cc_dep2 = args[3];
846 
847       /*---------------- ADDL ----------------*/
848 
849       if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
850          /* long add, then Z --> test (dst+src == 0) */
851          return unop(Iop_1Uto32,
852                      binop(Iop_CmpEQ32,
853                            binop(Iop_Add32, cc_dep1, cc_dep2),
854                            mkU32(0)));
855       }
856 
857       /*---------------- SUBL ----------------*/
858 
859       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
860          /* long sub/cmp, then Z --> test dst==src */
861          return unop(Iop_1Uto32,
862                      binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
863       }
864       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
865          /* long sub/cmp, then NZ --> test dst!=src */
866          return unop(Iop_1Uto32,
867                      binop(Iop_CmpNE32, cc_dep1, cc_dep2));
868       }
869 
870       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
871          /* long sub/cmp, then L (signed less than)
872             --> test dst <s src */
873          return unop(Iop_1Uto32,
874                      binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
875       }
876       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
877          /* long sub/cmp, then NL (signed greater than or equal)
878             --> test !(dst <s src) */
879          return binop(Iop_Xor32,
880                       unop(Iop_1Uto32,
881                            binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
882                       mkU32(1));
883       }
884 
885       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
886          /* long sub/cmp, then LE (signed less than or equal)
887             --> test dst <=s src */
888          return unop(Iop_1Uto32,
889                      binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
890       }
891       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
892          /* long sub/cmp, then NLE (signed not less than or equal)
893             --> test dst >s src
894             --> test !(dst <=s src) */
895          return binop(Iop_Xor32,
896                       unop(Iop_1Uto32,
897                            binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
898                       mkU32(1));
899       }
900 
901       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
902          /* long sub/cmp, then BE (unsigned less than or equal)
903             --> test dst <=u src */
904          return unop(Iop_1Uto32,
905                      binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
906       }
907       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
908          /* long sub/cmp, then BE (unsigned greater than)
909             --> test !(dst <=u src) */
910          return binop(Iop_Xor32,
911                       unop(Iop_1Uto32,
912                            binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
913                       mkU32(1));
914       }
915 
916       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
917          /* long sub/cmp, then B (unsigned less than)
918             --> test dst <u src */
919          return unop(Iop_1Uto32,
920                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
921       }
922       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
923          /* long sub/cmp, then NB (unsigned greater than or equal)
924             --> test !(dst <u src) */
925          return binop(Iop_Xor32,
926                       unop(Iop_1Uto32,
927                            binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
928                       mkU32(1));
929       }
930 
931       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
932          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
933          return unop(Iop_1Uto32,
934                      binop(Iop_CmpLT32S,
935                            binop(Iop_Sub32, cc_dep1, cc_dep2),
936                            mkU32(0)));
937       }
938       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
939          /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
940          return binop(Iop_Xor32,
941                       unop(Iop_1Uto32,
942                            binop(Iop_CmpLT32S,
943                                  binop(Iop_Sub32, cc_dep1, cc_dep2),
944                                  mkU32(0))),
945                       mkU32(1));
946       }
947 
948       /*---------------- SUBW ----------------*/
949 
950       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
951          /* word sub/cmp, then Z --> test dst==src */
952          return unop(Iop_1Uto32,
953                      binop(Iop_CmpEQ16,
954                            unop(Iop_32to16,cc_dep1),
955                            unop(Iop_32to16,cc_dep2)));
956       }
957       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
958          /* word sub/cmp, then NZ --> test dst!=src */
959          return unop(Iop_1Uto32,
960                      binop(Iop_CmpNE16,
961                            unop(Iop_32to16,cc_dep1),
962                            unop(Iop_32to16,cc_dep2)));
963       }
964 
965       /*---------------- SUBB ----------------*/
966 
967       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
968          /* byte sub/cmp, then Z --> test dst==src */
969          return unop(Iop_1Uto32,
970                      binop(Iop_CmpEQ8,
971                            unop(Iop_32to8,cc_dep1),
972                            unop(Iop_32to8,cc_dep2)));
973       }
974       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
975          /* byte sub/cmp, then NZ --> test dst!=src */
976          return unop(Iop_1Uto32,
977                      binop(Iop_CmpNE8,
978                            unop(Iop_32to8,cc_dep1),
979                            unop(Iop_32to8,cc_dep2)));
980       }
981 
982       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
983          /* byte sub/cmp, then NBE (unsigned greater than)
984             --> test src <u dst */
985          /* Note, args are opposite way round from the usual */
986          return unop(Iop_1Uto32,
987                      binop(Iop_CmpLT32U,
988                            binop(Iop_And32,cc_dep2,mkU32(0xFF)),
989 			   binop(Iop_And32,cc_dep1,mkU32(0xFF))));
990       }
991 
992       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
993                                         && isU32(cc_dep2, 0)) {
994          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
995                                          --> test dst <s 0
996                                          --> (UInt)dst[7]
997             This is yet another scheme by which gcc figures out if the
998             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
999          /* Note: isU32(cc_dep2, 0) is correct, even though this is
1000             for an 8-bit comparison, since the args to the helper
1001             function are always U32s. */
1002          return binop(Iop_And32,
1003                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
1004                       mkU32(1));
1005       }
1006       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
1007                                         && isU32(cc_dep2, 0)) {
1008          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1009                                           --> test !(dst <s 0)
1010                                           --> (UInt) !dst[7]
1011          */
1012          return binop(Iop_Xor32,
1013                       binop(Iop_And32,
1014                             binop(Iop_Shr32,cc_dep1,mkU8(7)),
1015                             mkU32(1)),
1016                 mkU32(1));
1017       }
1018 
1019       /*---------------- LOGICL ----------------*/
1020 
1021       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
1022          /* long and/or/xor, then Z --> test dst==0 */
1023          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1024       }
1025       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
1026          /* long and/or/xor, then NZ --> test dst!=0 */
1027          return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
1028       }
1029 
1030       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
1031          /* long and/or/xor, then LE
1032             This is pretty subtle.  LOGIC sets SF and ZF according to the
1033             result and makes OF be zero.  LE computes (SZ ^ OF) | ZF, but
1034             OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
1035             the result is <=signed 0.  Hence ...
1036          */
1037          return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1038       }
1039 
1040       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1041          /* long and/or/xor, then BE
1042             LOGIC sets ZF according to the result and makes CF be zero.
1043             BE computes (CF | ZF), but CF is zero, so this reduces ZF
1044             -- which will be 1 iff the result is zero.  Hence ...
1045          */
1046          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1047       }
1048 
1049       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1050          /* see comment below for (LOGICB, CondS) */
1051          /* long and/or/xor, then S --> (UInt)result[31] */
1052          return binop(Iop_And32,
1053                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
1054                       mkU32(1));
1055       }
1056       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1057          /* see comment below for (LOGICB, CondNS) */
1058          /* long and/or/xor, then S --> (UInt) ~ result[31] */
1059          return binop(Iop_Xor32,
1060                 binop(Iop_And32,
1061                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
1062                       mkU32(1)),
1063                 mkU32(1));
1064       }
1065 
1066       /*---------------- LOGICW ----------------*/
1067 
1068       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1069          /* word and/or/xor, then Z --> test dst==0 */
1070          return unop(Iop_1Uto32,
1071                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1072                                         mkU32(0)));
1073       }
1074 
1075       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1076          /* see comment below for (LOGICB, CondS) */
1077          /* word and/or/xor, then S --> (UInt)result[15] */
1078          return binop(Iop_And32,
1079                       binop(Iop_Shr32,cc_dep1,mkU8(15)),
1080                       mkU32(1));
1081       }
1082 
1083       /*---------------- LOGICB ----------------*/
1084 
1085       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1086          /* byte and/or/xor, then Z --> test dst==0 */
1087          return unop(Iop_1Uto32,
1088                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1089                                         mkU32(0)));
1090       }
1091       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1092          /* byte and/or/xor, then Z --> test dst!=0 */
1093          /* b9ac9:       84 c0                   test   %al,%al
1094             b9acb:       75 0d                   jne    b9ada */
1095          return unop(Iop_1Uto32,
1096                      binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1097                                         mkU32(0)));
1098       }
1099 
1100       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1101          /* this is an idiom gcc sometimes uses to find out if the top
1102             bit of a byte register is set: eg testb %al,%al; js ..
1103             Since it just depends on the top bit of the byte, extract
1104             that bit and explicitly get rid of all the rest.  This
1105             helps memcheck avoid false positives in the case where any
1106             of the other bits in the byte are undefined. */
1107          /* byte and/or/xor, then S --> (UInt)result[7] */
1108          return binop(Iop_And32,
1109                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
1110                       mkU32(1));
1111       }
1112       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1113          /* ditto, for negation-of-S. */
1114          /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1115          return binop(Iop_Xor32,
1116                 binop(Iop_And32,
1117                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
1118                       mkU32(1)),
1119                 mkU32(1));
1120       }
1121 
1122       /*---------------- DECL ----------------*/
1123 
1124       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1125          /* dec L, then Z --> test dst == 0 */
1126          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1127       }
1128 
1129       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1130          /* dec L, then S --> compare DST <s 0 */
1131          return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1132       }
1133 
1134       /*---------------- DECW ----------------*/
1135 
1136       if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1137          /* dec W, then Z --> test dst == 0 */
1138          return unop(Iop_1Uto32,
1139                      binop(Iop_CmpEQ32,
1140                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
1141                            mkU32(0)));
1142       }
1143 
1144       /*---------------- INCW ----------------*/
1145 
1146       if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1147          /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1148          /* inc W, then Z --> test dst == 0 */
1149          return unop(Iop_1Uto32,
1150                      binop(Iop_CmpEQ32,
1151                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
1152                            mkU32(0)));
1153       }
1154 
1155       /*---------------- SHRL ----------------*/
1156 
1157       if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1158          /* SHRL, then Z --> test dep1 == 0 */
1159          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1160       }
1161 
1162       /*---------------- COPY ----------------*/
1163       /* This can happen, as a result of x87 FP compares: "fcom ... ;
1164          fnstsw %ax ; sahf ; jbe" for example. */
1165 
1166       if (isU32(cc_op, X86G_CC_OP_COPY) &&
1167           (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1168          /* COPY, then BE --> extract C and Z from dep1, and test
1169             (C or Z) == 1. */
1170          /* COPY, then NBE --> extract C and Z from dep1, and test
1171             (C or Z) == 0. */
1172          UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1173          return
1174             unop(
1175                Iop_1Uto32,
1176                binop(
1177                   Iop_CmpEQ32,
1178                   binop(
1179                      Iop_And32,
1180                      binop(
1181                         Iop_Or32,
1182                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1183                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1184                      ),
1185                      mkU32(1)
1186                   ),
1187                   mkU32(nnn)
1188                )
1189             );
1190       }
1191 
1192       if (isU32(cc_op, X86G_CC_OP_COPY)
1193           && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1194          /* COPY, then B --> extract C from dep1, and test (C == 1). */
1195          /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1196          UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1197          return
1198             unop(
1199                Iop_1Uto32,
1200                binop(
1201                   Iop_CmpEQ32,
1202                   binop(
1203                      Iop_And32,
1204                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1205                      mkU32(1)
1206                   ),
1207                   mkU32(nnn)
1208                )
1209             );
1210       }
1211 
1212       if (isU32(cc_op, X86G_CC_OP_COPY)
1213           && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1214          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1215          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1216          UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1217          return
1218             unop(
1219                Iop_1Uto32,
1220                binop(
1221                   Iop_CmpEQ32,
1222                   binop(
1223                      Iop_And32,
1224                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1225                      mkU32(1)
1226                   ),
1227                   mkU32(nnn)
1228                )
1229             );
1230       }
1231 
1232       if (isU32(cc_op, X86G_CC_OP_COPY)
1233           && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1234          /* COPY, then P --> extract P from dep1, and test (P == 1). */
1235          /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1236          UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1237          return
1238             unop(
1239                Iop_1Uto32,
1240                binop(
1241                   Iop_CmpEQ32,
1242                   binop(
1243                      Iop_And32,
1244                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1245                      mkU32(1)
1246                   ),
1247                   mkU32(nnn)
1248                )
1249             );
1250       }
1251 
1252       return NULL;
1253    }
1254 
1255    /* --------- specialising "x86g_calculate_eflags_c" --------- */
1256 
1257    if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1258       /* specialise calls to above "calculate_eflags_c" function */
1259       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1260       vassert(arity == 4);
1261       cc_op   = args[0];
1262       cc_dep1 = args[1];
1263       cc_dep2 = args[2];
1264       cc_ndep = args[3];
1265 
1266       if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1267          /* C after sub denotes unsigned less than */
1268          return unop(Iop_1Uto32,
1269                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1270       }
1271       if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1272          /* C after sub denotes unsigned less than */
1273          return unop(Iop_1Uto32,
1274                      binop(Iop_CmpLT32U,
1275                            binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1276                            binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1277       }
1278       if (isU32(cc_op, X86G_CC_OP_LOGICL)
1279           || isU32(cc_op, X86G_CC_OP_LOGICW)
1280           || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1281          /* cflag after logic is zero */
1282          return mkU32(0);
1283       }
1284       if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1285          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1286          return cc_ndep;
1287       }
1288       if (isU32(cc_op, X86G_CC_OP_COPY)) {
1289          /* cflag after COPY is stored in DEP1. */
1290          return
1291             binop(
1292                Iop_And32,
1293                binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1294                mkU32(1)
1295             );
1296       }
1297       if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1298          /* C after add denotes sum <u either arg */
1299          return unop(Iop_1Uto32,
1300                      binop(Iop_CmpLT32U,
1301                            binop(Iop_Add32, cc_dep1, cc_dep2),
1302                            cc_dep1));
1303       }
1304       // ATC, requires verification, no test case known
1305       //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1306       //   /* C after signed widening multiply denotes the case where
1307       //      the top half of the result isn't simply the sign extension
1308       //      of the bottom half (iow the result doesn't fit completely
1309       //      in the bottom half).  Hence:
1310       //        C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1311       //      where 'x' denotes signed widening multiply.*/
1312       //   return
1313       //      unop(Iop_1Uto32,
1314       //           binop(Iop_CmpNE32,
1315       //                 unop(Iop_64HIto32,
1316       //                      binop(Iop_MullS32, cc_dep1, cc_dep2)),
1317       //                 binop(Iop_Sar32,
1318       //                       binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1319       //}
1320 #     if 0
1321       if (cc_op->tag == Iex_Const) {
1322          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1323       }
1324 #     endif
1325 
1326       return NULL;
1327    }
1328 
1329    /* --------- specialising "x86g_calculate_eflags_all" --------- */
1330 
1331    if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1332       /* specialise calls to above "calculate_eflags_all" function */
1333       IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1334       vassert(arity == 4);
1335       cc_op   = args[0];
1336       cc_dep1 = args[1];
1337       /* cc_dep2 = args[2]; */
1338       /* cc_ndep = args[3]; */
1339 
1340       if (isU32(cc_op, X86G_CC_OP_COPY)) {
1341          /* eflags after COPY are stored in DEP1. */
1342          return
1343             binop(
1344                Iop_And32,
1345                cc_dep1,
1346                mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1347                      | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1348             );
1349       }
1350       return NULL;
1351    }
1352 
1353 #  undef unop
1354 #  undef binop
1355 #  undef mkU32
1356 #  undef mkU8
1357 
1358    return NULL;
1359 }
1360 
1361 
1362 /*---------------------------------------------------------------*/
1363 /*--- Supporting functions for x87 FPU activities.            ---*/
1364 /*---------------------------------------------------------------*/
1365 
host_is_little_endian(void)1366 static inline Bool host_is_little_endian ( void )
1367 {
1368    UInt x = 0x76543210;
1369    UChar* p = (UChar*)(&x);
1370    return toBool(*p == 0x10);
1371 }
1372 
1373 /* 80 and 64-bit floating point formats:
1374 
1375    80-bit:
1376 
1377     S  0       0-------0      zero
1378     S  0       0X------X      denormals
1379     S  1-7FFE  1X------X      normals (all normals have leading 1)
1380     S  7FFF    10------0      infinity
1381     S  7FFF    10X-----X      snan
1382     S  7FFF    11X-----X      qnan
1383 
1384    S is the sign bit.  For runs X----X, at least one of the Xs must be
1385    nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
1386    there is an explicitly represented leading 1, and a sign bit,
1387    giving 80 in total.
1388 
1389    64-bit avoids the confusion of an explicitly represented leading 1
1390    and so is simpler:
1391 
1392     S  0      0------0   zero
1393     S  0      X------X   denormals
1394     S  1-7FE  any        normals
1395     S  7FF    0------0   infinity
1396     S  7FF    0X-----X   snan
1397     S  7FF    1X-----X   qnan
1398 
1399    Exponent is 11 bits, fractional part is 52 bits, and there is a
1400    sign bit, giving 64 in total.
1401 */
1402 
1403 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1404 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_FXAM(UInt tag,ULong dbl)1405 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1406 {
1407    Bool   mantissaIsZero;
1408    Int    bexp;
1409    UChar  sign;
1410    UChar* f64;
1411 
1412    vassert(host_is_little_endian());
1413 
1414    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1415 
1416    f64  = (UChar*)(&dbl);
1417    sign = toUChar( (f64[7] >> 7) & 1 );
1418 
1419    /* First off, if the tag indicates the register was empty,
1420       return 1,0,sign,1 */
1421    if (tag == 0) {
1422       /* vex_printf("Empty\n"); */
1423       return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1424                                  | X86G_FC_MASK_C0;
1425    }
1426 
1427    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1428    bexp &= 0x7FF;
1429 
1430    mantissaIsZero
1431       = toBool(
1432            (f64[6] & 0x0F) == 0
1433            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1434         );
1435 
1436    /* If both exponent and mantissa are zero, the value is zero.
1437       Return 1,0,sign,0. */
1438    if (bexp == 0 && mantissaIsZero) {
1439       /* vex_printf("Zero\n"); */
1440       return X86G_FC_MASK_C3 | 0
1441                              | (sign << X86G_FC_SHIFT_C1) | 0;
1442    }
1443 
1444    /* If exponent is zero but mantissa isn't, it's a denormal.
1445       Return 1,1,sign,0. */
1446    if (bexp == 0 && !mantissaIsZero) {
1447       /* vex_printf("Denormal\n"); */
1448       return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1449                              | (sign << X86G_FC_SHIFT_C1) | 0;
1450    }
1451 
1452    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1453       Return 0,1,sign,1. */
1454    if (bexp == 0x7FF && mantissaIsZero) {
1455       /* vex_printf("Inf\n"); */
1456       return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1457                                  | X86G_FC_MASK_C0;
1458    }
1459 
1460    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1461       Return 0,0,sign,1. */
1462    if (bexp == 0x7FF && !mantissaIsZero) {
1463       /* vex_printf("NaN\n"); */
1464       return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1465    }
1466 
1467    /* Uh, ok, we give up.  It must be a normal finite number.
1468       Return 0,1,sign,0.
1469    */
1470    /* vex_printf("normal\n"); */
1471    return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1472 }
1473 
1474 
1475 /* CALLED FROM GENERATED CODE */
1476 /* DIRTY HELPER (reads guest memory) */
x86g_dirtyhelper_loadF80le(Addr addrU)1477 ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
1478 {
1479    ULong f64;
1480    convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
1481    return f64;
1482 }
1483 
1484 /* CALLED FROM GENERATED CODE */
1485 /* DIRTY HELPER (writes guest memory) */
x86g_dirtyhelper_storeF80le(Addr addrU,ULong f64)1486 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
1487 {
1488    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
1489 }
1490 
1491 
1492 /*----------------------------------------------*/
1493 /*--- The exported fns ..                    ---*/
1494 /*----------------------------------------------*/
1495 
1496 /* Layout of the real x87 state. */
1497 /* 13 June 05: Fpu_State and auxiliary constants was moved to
1498    g_generic_x87.h */
1499 
1500 
1501 /* CLEAN HELPER */
1502 /* fpucw[15:0] contains a x87 native format FPU control word.
1503    Extract from it the required FPROUND value and any resulting
1504    emulation warning, and return (warn << 32) | fpround value.
1505 */
x86g_check_fldcw(UInt fpucw)1506 ULong x86g_check_fldcw ( UInt fpucw )
1507 {
1508    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
1509    /* NOTE, encoded exactly as per enum IRRoundingMode. */
1510    UInt rmode = (fpucw >> 10) & 3;
1511 
1512    /* Detect any required emulation warnings. */
1513    VexEmNote ew = EmNote_NONE;
1514 
1515    if ((fpucw & 0x3F) != 0x3F) {
1516       /* unmasked exceptions! */
1517       ew = EmWarn_X86_x87exns;
1518    }
1519    else
1520    if (((fpucw >> 8) & 3) != 3) {
1521       /* unsupported precision */
1522       ew = EmWarn_X86_x87precision;
1523    }
1524 
1525    return (((ULong)ew) << 32) | ((ULong)rmode);
1526 }
1527 
1528 /* CLEAN HELPER */
1529 /* Given fpround as an IRRoundingMode value, create a suitable x87
1530    native format FPU control word. */
x86g_create_fpucw(UInt fpround)1531 UInt x86g_create_fpucw ( UInt fpround )
1532 {
1533    fpround &= 3;
1534    return 0x037F | (fpround << 10);
1535 }
1536 
1537 
1538 /* CLEAN HELPER */
1539 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1540    Extract from it the required SSEROUND value and any resulting
1541    emulation warning, and return (warn << 32) | sseround value.
1542 */
x86g_check_ldmxcsr(UInt mxcsr)1543 ULong x86g_check_ldmxcsr ( UInt mxcsr )
1544 {
1545    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
1546    /* NOTE, encoded exactly as per enum IRRoundingMode. */
1547    UInt rmode = (mxcsr >> 13) & 3;
1548 
1549    /* Detect any required emulation warnings. */
1550    VexEmNote ew = EmNote_NONE;
1551 
1552    if ((mxcsr & 0x1F80) != 0x1F80) {
1553       /* unmasked exceptions! */
1554       ew = EmWarn_X86_sseExns;
1555    }
1556    else
1557    if (mxcsr & (1<<15)) {
1558       /* FZ is set */
1559       ew = EmWarn_X86_fz;
1560    }
1561    else
1562    if (mxcsr & (1<<6)) {
1563       /* DAZ is set */
1564       ew = EmWarn_X86_daz;
1565    }
1566 
1567    return (((ULong)ew) << 32) | ((ULong)rmode);
1568 }
1569 
1570 
1571 /* CLEAN HELPER */
1572 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1573    native format MXCSR value. */
x86g_create_mxcsr(UInt sseround)1574 UInt x86g_create_mxcsr ( UInt sseround )
1575 {
1576    sseround &= 3;
1577    return 0x1F80 | (sseround << 13);
1578 }
1579 
1580 
1581 /* CALLED FROM GENERATED CODE */
1582 /* DIRTY HELPER (writes guest state) */
1583 /* Initialise the x87 FPU state as per 'finit'. */
x86g_dirtyhelper_FINIT(VexGuestX86State * gst)1584 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1585 {
1586    Int i;
1587    gst->guest_FTOP = 0;
1588    for (i = 0; i < 8; i++) {
1589       gst->guest_FPTAG[i] = 0; /* empty */
1590       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1591    }
1592    gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1593    gst->guest_FC3210  = 0;
1594 }
1595 
1596 
1597 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
1598    appears to differ from the former only in that the 8 FP registers
1599    themselves are not transferred into the guest state. */
1600 static
do_put_x87(Bool moveRegs,Fpu_State * x87_state,VexGuestX86State * vex_state)1601 VexEmNote do_put_x87 ( Bool moveRegs,
1602                        /*IN*/Fpu_State* x87_state,
1603                        /*OUT*/VexGuestX86State* vex_state )
1604 {
1605    Int        stno, preg;
1606    UInt       tag;
1607    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1608    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1609    UInt       ftop    = (x87_state->env[FP_ENV_STAT] >> 11) & 7;
1610    UInt       tagw    = x87_state->env[FP_ENV_TAG];
1611    UInt       fpucw   = x87_state->env[FP_ENV_CTRL];
1612    UInt       c3210   = x87_state->env[FP_ENV_STAT] & 0x4700;
1613    VexEmNote  ew;
1614    UInt       fpround;
1615    ULong      pair;
1616 
1617    /* Copy registers and tags */
1618    for (stno = 0; stno < 8; stno++) {
1619       preg = (stno + ftop) & 7;
1620       tag = (tagw >> (2*preg)) & 3;
1621       if (tag == 3) {
1622          /* register is empty */
1623          /* hmm, if it's empty, does it still get written?  Probably
1624             safer to say it does.  If we don't, memcheck could get out
1625             of sync, in that it thinks all FP registers are defined by
1626             this helper, but in reality some have not been updated. */
1627          if (moveRegs)
1628             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1629          vexTags[preg] = 0;
1630       } else {
1631          /* register is non-empty */
1632          if (moveRegs)
1633             convert_f80le_to_f64le( &x87_state->reg[10*stno],
1634                                     (UChar*)&vexRegs[preg] );
1635          vexTags[preg] = 1;
1636       }
1637    }
1638 
1639    /* stack pointer */
1640    vex_state->guest_FTOP = ftop;
1641 
1642    /* status word */
1643    vex_state->guest_FC3210 = c3210;
1644 
1645    /* handle the control word, setting FPROUND and detecting any
1646       emulation warnings. */
1647    pair    = x86g_check_fldcw ( (UInt)fpucw );
1648    fpround = (UInt)pair;
1649    ew      = (VexEmNote)(pair >> 32);
1650 
1651    vex_state->guest_FPROUND = fpround & 3;
1652 
1653    /* emulation warnings --> caller */
1654    return ew;
1655 }
1656 
1657 
1658 /* Create an x87 FPU state from the guest state, as close as
1659    we can approximate it. */
1660 static
do_get_x87(VexGuestX86State * vex_state,Fpu_State * x87_state)1661 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1662                   /*OUT*/Fpu_State* x87_state )
1663 {
1664    Int        i, stno, preg;
1665    UInt       tagw;
1666    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1667    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1668    UInt       ftop    = vex_state->guest_FTOP;
1669    UInt       c3210   = vex_state->guest_FC3210;
1670 
1671    for (i = 0; i < 14; i++)
1672       x87_state->env[i] = 0;
1673 
1674    x87_state->env[1] = x87_state->env[3] = x87_state->env[5]
1675       = x87_state->env[13] = 0xFFFF;
1676    x87_state->env[FP_ENV_STAT]
1677       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1678    x87_state->env[FP_ENV_CTRL]
1679       = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1680 
1681    /* Dump the register stack in ST order. */
1682    tagw = 0;
1683    for (stno = 0; stno < 8; stno++) {
1684       preg = (stno + ftop) & 7;
1685       if (vexTags[preg] == 0) {
1686          /* register is empty */
1687          tagw |= (3 << (2*preg));
1688          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1689                                  &x87_state->reg[10*stno] );
1690       } else {
1691          /* register is full. */
1692          tagw |= (0 << (2*preg));
1693          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1694                                  &x87_state->reg[10*stno] );
1695       }
1696    }
1697    x87_state->env[FP_ENV_TAG] = toUShort(tagw);
1698 }
1699 
1700 
1701 /* CALLED FROM GENERATED CODE */
1702 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FXSAVE(VexGuestX86State * gst,HWord addr)1703 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1704 {
1705    /* Somewhat roundabout, but at least it's simple. */
1706    Fpu_State tmp;
1707    UShort*   addrS = (UShort*)addr;
1708    UChar*    addrC = (UChar*)addr;
1709    U128*     xmm   = (U128*)(addr + 160);
1710    UInt      mxcsr;
1711    UShort    fp_tags;
1712    UInt      summary_tags;
1713    Int       r, stno;
1714    UShort    *srcS, *dstS;
1715 
1716    do_get_x87( gst, &tmp );
1717    mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1718 
1719    /* Now build the proper fxsave image from the x87 image we just
1720       made. */
1721 
1722    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1723    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1724 
1725    /* set addrS[2] in an endian-independent way */
1726    summary_tags = 0;
1727    fp_tags = tmp.env[FP_ENV_TAG];
1728    for (r = 0; r < 8; r++) {
1729       if ( ((fp_tags >> (2*r)) & 3) != 3 )
1730          summary_tags |= (1 << r);
1731    }
1732    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
1733    addrC[5]  = 0; /* pad */
1734 
1735    addrS[3]  = 0; /* FOP: fpu opcode (bogus) */
1736    addrS[4]  = 0;
1737    addrS[5]  = 0; /* FPU IP (bogus) */
1738    addrS[6]  = 0; /* FPU IP's segment selector (bogus) (although we
1739                      could conceivably dump %CS here) */
1740 
1741    addrS[7]  = 0; /* Intel reserved */
1742 
1743    addrS[8]  = 0; /* FPU DP (operand pointer) (bogus) */
1744    addrS[9]  = 0; /* FPU DP (operand pointer) (bogus) */
1745    addrS[10] = 0; /* segment selector for above operand pointer; %DS
1746                      perhaps? */
1747    addrS[11] = 0; /* Intel reserved */
1748 
1749    addrS[12] = toUShort(mxcsr);  /* MXCSR */
1750    addrS[13] = toUShort(mxcsr >> 16);
1751 
1752    addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1753    addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1754 
1755    /* Copy in the FP registers, in ST order. */
1756    for (stno = 0; stno < 8; stno++) {
1757       srcS = (UShort*)(&tmp.reg[10*stno]);
1758       dstS = (UShort*)(&addrS[16 + 8*stno]);
1759       dstS[0] = srcS[0];
1760       dstS[1] = srcS[1];
1761       dstS[2] = srcS[2];
1762       dstS[3] = srcS[3];
1763       dstS[4] = srcS[4];
1764       dstS[5] = 0;
1765       dstS[6] = 0;
1766       dstS[7] = 0;
1767    }
1768 
1769    /* That's the first 160 bytes of the image done.  Now only %xmm0
1770       .. %xmm7 remain to be copied.  If the host is big-endian, these
1771       need to be byte-swapped. */
1772    vassert(host_is_little_endian());
1773 
1774 #  define COPY_U128(_dst,_src)                       \
1775       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1776            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1777       while (0)
1778 
1779    COPY_U128( xmm[0], gst->guest_XMM0 );
1780    COPY_U128( xmm[1], gst->guest_XMM1 );
1781    COPY_U128( xmm[2], gst->guest_XMM2 );
1782    COPY_U128( xmm[3], gst->guest_XMM3 );
1783    COPY_U128( xmm[4], gst->guest_XMM4 );
1784    COPY_U128( xmm[5], gst->guest_XMM5 );
1785    COPY_U128( xmm[6], gst->guest_XMM6 );
1786    COPY_U128( xmm[7], gst->guest_XMM7 );
1787 
1788 #  undef COPY_U128
1789 }
1790 
1791 
1792 /* CALLED FROM GENERATED CODE */
1793 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FXRSTOR(VexGuestX86State * gst,HWord addr)1794 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1795 {
1796    Fpu_State tmp;
1797    VexEmNote warnX87 = EmNote_NONE;
1798    VexEmNote warnXMM = EmNote_NONE;
1799    UShort*   addrS   = (UShort*)addr;
1800    UChar*    addrC   = (UChar*)addr;
1801    U128*     xmm     = (U128*)(addr + 160);
1802    UShort    fp_tags;
1803    Int       r, stno, i;
1804 
1805    /* Restore %xmm0 .. %xmm7.  If the host is big-endian, these need
1806       to be byte-swapped. */
1807    vassert(host_is_little_endian());
1808 
1809 #  define COPY_U128(_dst,_src)                       \
1810       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1811            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1812       while (0)
1813 
1814    COPY_U128( gst->guest_XMM0, xmm[0] );
1815    COPY_U128( gst->guest_XMM1, xmm[1] );
1816    COPY_U128( gst->guest_XMM2, xmm[2] );
1817    COPY_U128( gst->guest_XMM3, xmm[3] );
1818    COPY_U128( gst->guest_XMM4, xmm[4] );
1819    COPY_U128( gst->guest_XMM5, xmm[5] );
1820    COPY_U128( gst->guest_XMM6, xmm[6] );
1821    COPY_U128( gst->guest_XMM7, xmm[7] );
1822 
1823 #  undef COPY_U128
1824 
1825    /* Copy the x87 registers out of the image, into a temporary
1826       Fpu_State struct. */
1827 
1828    /* LLVM on Darwin turns the following loop into a movaps plus a
1829       handful of scalar stores.  This would work fine except for the
1830       fact that VEX doesn't keep the stack correctly (16-) aligned for
1831       the call, so it segfaults.  Hence, split the loop into two
1832       pieces (and pray LLVM doesn't merely glue them back together) so
1833       it's composed only of scalar stores and so is alignment
1834       insensitive.  Of course this is a kludge of the lamest kind --
1835       VEX should be fixed properly. */
1836    /* Code that seems to trigger the problem:
1837       for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1838    for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1839    __asm__ __volatile__("" ::: "memory");
1840    for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1841 
1842    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1843    /* fill in tmp.reg[0..7] */
1844    for (stno = 0; stno < 8; stno++) {
1845       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1846       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1847       dstS[0] = srcS[0];
1848       dstS[1] = srcS[1];
1849       dstS[2] = srcS[2];
1850       dstS[3] = srcS[3];
1851       dstS[4] = srcS[4];
1852    }
1853    /* fill in tmp.env[0..13] */
1854    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1855    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1856 
1857    fp_tags = 0;
1858    for (r = 0; r < 8; r++) {
1859       if (addrC[4] & (1<<r))
1860          fp_tags |= (0 << (2*r)); /* EMPTY */
1861       else
1862          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1863    }
1864    tmp.env[FP_ENV_TAG] = fp_tags;
1865 
1866    /* Now write 'tmp' into the guest state. */
1867    warnX87 = do_put_x87( True/*moveRegs*/, &tmp, gst );
1868 
1869    { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1870                 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1871      ULong w64 = x86g_check_ldmxcsr( w32 );
1872 
1873      warnXMM = (VexEmNote)(w64 >> 32);
1874 
1875      gst->guest_SSEROUND = w64 & 0xFFFFFFFF;
1876    }
1877 
1878    /* Prefer an X87 emwarn over an XMM one, if both exist. */
1879    if (warnX87 != EmNote_NONE)
1880       return warnX87;
1881    else
1882       return warnXMM;
1883 }
1884 
1885 
1886 /* CALLED FROM GENERATED CODE */
1887 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSAVE(VexGuestX86State * gst,HWord addr)1888 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1889 {
1890    do_get_x87( gst, (Fpu_State*)addr );
1891 }
1892 
1893 /* CALLED FROM GENERATED CODE */
1894 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FRSTOR(VexGuestX86State * gst,HWord addr)1895 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1896 {
1897    return do_put_x87( True/*regs too*/, (Fpu_State*)addr, gst );
1898 }
1899 
1900 /* CALLED FROM GENERATED CODE */
1901 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSTENV(VexGuestX86State * gst,HWord addr)1902 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1903 {
1904    /* Somewhat roundabout, but at least it's simple. */
1905    Int       i;
1906    UShort*   addrP = (UShort*)addr;
1907    Fpu_State tmp;
1908    do_get_x87( gst, &tmp );
1909    for (i = 0; i < 14; i++)
1910       addrP[i] = tmp.env[i];
1911 }
1912 
1913 /* CALLED FROM GENERATED CODE */
1914 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FLDENV(VexGuestX86State * gst,HWord addr)1915 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1916 {
1917    return do_put_x87( False/*don't move regs*/, (Fpu_State*)addr, gst);
1918 }
1919 
1920 /* VISIBLE TO LIBVEX CLIENT */
1921 /* Do x87 save from the supplied VexGuestX86State structure and store the
1922    result at the given address which represents a buffer of at least 108
1923    bytes. */
LibVEX_GuestX86_get_x87(VexGuestX86State * vex_state,UChar * x87_state)1924 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1925                                /*OUT*/UChar* x87_state )
1926 {
1927    do_get_x87 ( vex_state, (Fpu_State*)x87_state );
1928 }
1929 
1930 /* VISIBLE TO LIBVEX CLIENT */
1931 /* Do x87 restore from the supplied address and store read values to the given
1932    VexGuestX86State structure. */
LibVEX_GuestX86_put_x87(UChar * x87_state,VexGuestX86State * vex_state)1933 VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
1934                                     /*MOD*/VexGuestX86State* vex_state )
1935 {
1936    return do_put_x87 ( True/*moveRegs*/, (Fpu_State*)x87_state, vex_state );
1937 }
1938 
1939 /* VISIBLE TO LIBVEX CLIENT */
1940 /* Return mxcsr from the supplied VexGuestX86State structure. */
LibVEX_GuestX86_get_mxcsr(VexGuestX86State * vex_state)1941 UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state )
1942 {
1943    return x86g_create_mxcsr ( vex_state->guest_SSEROUND );
1944 }
1945 
1946 /* VISIBLE TO LIBVEX CLIENT */
1947 /* Modify the given VexGuestX86State structure according to the passed mxcsr
1948    value. */
LibVEX_GuestX86_put_mxcsr(UInt mxcsr,VexGuestX86State * vex_state)1949 VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr,
1950                                       /*MOD*/VexGuestX86State* vex_state)
1951 {
1952    ULong w64 = x86g_check_ldmxcsr( mxcsr );
1953    vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF;
1954    return (VexEmNote)(w64 >> 32);
1955 }
1956 
1957 /*---------------------------------------------------------------*/
1958 /*--- Misc integer helpers, including rotates and CPUID.      ---*/
1959 /*---------------------------------------------------------------*/
1960 
1961 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1962 /* Calculate both flags and value result for rotate right
1963    through the carry bit.  Result in low 32 bits,
1964    new flags (OSZACP) in high 32 bits.
1965 */
x86g_calculate_RCR(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)1966 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1967 {
1968    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1969 
1970    switch (sz) {
1971       case 4:
1972          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1973          of        = ((arg >> 31) ^ cf) & 1;
1974          while (tempCOUNT > 0) {
1975             tempcf = arg & 1;
1976             arg    = (arg >> 1) | (cf << 31);
1977             cf     = tempcf;
1978             tempCOUNT--;
1979          }
1980          break;
1981       case 2:
1982          while (tempCOUNT >= 17) tempCOUNT -= 17;
1983          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1984          of        = ((arg >> 15) ^ cf) & 1;
1985          while (tempCOUNT > 0) {
1986             tempcf = arg & 1;
1987             arg    = ((arg >> 1) & 0x7FFF) | (cf << 15);
1988             cf     = tempcf;
1989             tempCOUNT--;
1990          }
1991          break;
1992       case 1:
1993          while (tempCOUNT >= 9) tempCOUNT -= 9;
1994          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1995          of        = ((arg >> 7) ^ cf) & 1;
1996          while (tempCOUNT > 0) {
1997             tempcf = arg & 1;
1998             arg    = ((arg >> 1) & 0x7F) | (cf << 7);
1999             cf     = tempcf;
2000             tempCOUNT--;
2001          }
2002          break;
2003       default:
2004          vpanic("calculate_RCR: invalid size");
2005    }
2006 
2007    cf &= 1;
2008    of &= 1;
2009    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2010    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2011 
2012    return (((ULong)eflags_in) << 32) | ((ULong)arg);
2013 }
2014 
2015 
2016 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2017 /* Calculate both flags and value result for rotate left
2018    through the carry bit.  Result in low 32 bits,
2019    new flags (OSZACP) in high 32 bits.
2020 */
x86g_calculate_RCL(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)2021 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
2022 {
2023    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
2024 
2025    switch (sz) {
2026       case 4:
2027          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2028          while (tempCOUNT > 0) {
2029             tempcf = (arg >> 31) & 1;
2030             arg    = (arg << 1) | (cf & 1);
2031             cf     = tempcf;
2032             tempCOUNT--;
2033          }
2034          of = ((arg >> 31) ^ cf) & 1;
2035          break;
2036       case 2:
2037          while (tempCOUNT >= 17) tempCOUNT -= 17;
2038          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2039          while (tempCOUNT > 0) {
2040             tempcf = (arg >> 15) & 1;
2041             arg    = 0xFFFF & ((arg << 1) | (cf & 1));
2042             cf     = tempcf;
2043             tempCOUNT--;
2044          }
2045          of = ((arg >> 15) ^ cf) & 1;
2046          break;
2047       case 1:
2048          while (tempCOUNT >= 9) tempCOUNT -= 9;
2049          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2050          while (tempCOUNT > 0) {
2051             tempcf = (arg >> 7) & 1;
2052             arg    = 0xFF & ((arg << 1) | (cf & 1));
2053             cf     = tempcf;
2054             tempCOUNT--;
2055          }
2056          of = ((arg >> 7) ^ cf) & 1;
2057          break;
2058       default:
2059          vpanic("calculate_RCL: invalid size");
2060    }
2061 
2062    cf &= 1;
2063    of &= 1;
2064    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2065    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2066 
2067    return (((ULong)eflags_in) << 32) | ((ULong)arg);
2068 }
2069 
2070 
2071 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2072 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2073    AX value in low half of arg, OSZACP in upper half.
2074    See guest-x86/toIR.c usage point for details.
2075 */
calc_parity_8bit(UInt w32)2076 static UInt calc_parity_8bit ( UInt w32 ) {
2077    UInt i;
2078    UInt p = 1;
2079    for (i = 0; i < 8; i++)
2080       p ^= (1 & (w32 >> i));
2081    return p;
2082 }
x86g_calculate_daa_das_aaa_aas(UInt flags_and_AX,UInt opcode)2083 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2084 {
2085    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2086    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2087    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2088    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2089    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2090    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2091    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2092    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2093    UInt result = 0;
2094 
2095    switch (opcode) {
2096       case 0x27: { /* DAA */
2097          UInt old_AL = r_AL;
2098          UInt old_C  = r_C;
2099          r_C = 0;
2100          if ((r_AL & 0xF) > 9 || r_A == 1) {
2101             r_AL = r_AL + 6;
2102             r_C  = old_C;
2103             if (r_AL >= 0x100) r_C = 1;
2104             r_A = 1;
2105          } else {
2106             r_A = 0;
2107          }
2108          if (old_AL > 0x99 || old_C == 1) {
2109             r_AL = r_AL + 0x60;
2110             r_C  = 1;
2111          } else {
2112             r_C = 0;
2113          }
2114          /* O is undefined.  S Z and P are set according to the
2115 	    result. */
2116          r_AL &= 0xFF;
2117          r_O = 0; /* let's say */
2118          r_S = (r_AL & 0x80) ? 1 : 0;
2119          r_Z = (r_AL == 0) ? 1 : 0;
2120          r_P = calc_parity_8bit( r_AL );
2121          break;
2122       }
2123       case 0x2F: { /* DAS */
2124          UInt old_AL = r_AL;
2125          UInt old_C  = r_C;
2126          r_C = 0;
2127          if ((r_AL & 0xF) > 9 || r_A == 1) {
2128             Bool borrow = r_AL < 6;
2129             r_AL = r_AL - 6;
2130             r_C  = old_C;
2131             if (borrow) r_C = 1;
2132             r_A = 1;
2133          } else {
2134             r_A = 0;
2135          }
2136          if (old_AL > 0x99 || old_C == 1) {
2137             r_AL = r_AL - 0x60;
2138             r_C  = 1;
2139          } else {
2140             /* Intel docs are wrong: r_C = 0; */
2141          }
2142          /* O is undefined.  S Z and P are set according to the
2143 	    result. */
2144          r_AL &= 0xFF;
2145          r_O = 0; /* let's say */
2146          r_S = (r_AL & 0x80) ? 1 : 0;
2147          r_Z = (r_AL == 0) ? 1 : 0;
2148          r_P = calc_parity_8bit( r_AL );
2149          break;
2150       }
2151       case 0x37: { /* AAA */
2152          Bool nudge = r_AL > 0xF9;
2153          if ((r_AL & 0xF) > 9 || r_A == 1) {
2154             r_AL = r_AL + 6;
2155             r_AH = r_AH + 1 + (nudge ? 1 : 0);
2156             r_A  = 1;
2157             r_C  = 1;
2158             r_AL = r_AL & 0xF;
2159          } else {
2160             r_A  = 0;
2161             r_C  = 0;
2162             r_AL = r_AL & 0xF;
2163          }
2164          /* O S Z and P are undefined. */
2165          r_O = r_S = r_Z = r_P = 0; /* let's say */
2166          break;
2167       }
2168       case 0x3F: { /* AAS */
2169          Bool nudge = r_AL < 0x06;
2170          if ((r_AL & 0xF) > 9 || r_A == 1) {
2171             r_AL = r_AL - 6;
2172             r_AH = r_AH - 1 - (nudge ? 1 : 0);
2173             r_A  = 1;
2174             r_C  = 1;
2175             r_AL = r_AL & 0xF;
2176          } else {
2177             r_A  = 0;
2178             r_C  = 0;
2179             r_AL = r_AL & 0xF;
2180          }
2181          /* O S Z and P are undefined. */
2182          r_O = r_S = r_Z = r_P = 0; /* let's say */
2183          break;
2184       }
2185       default:
2186          vassert(0);
2187    }
2188    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2189             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2190             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2191             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2192             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2193             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2194             | ( (r_AH & 0xFF) << 8 )
2195             | ( (r_AL & 0xFF) << 0 );
2196    return result;
2197 }
2198 
x86g_calculate_aad_aam(UInt flags_and_AX,UInt opcode)2199 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2200 {
2201    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2202    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2203    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2204    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2205    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2206    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2207    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2208    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2209    UInt result = 0;
2210 
2211    switch (opcode) {
2212       case 0xD4: { /* AAM */
2213          r_AH = r_AL / 10;
2214          r_AL = r_AL % 10;
2215          break;
2216       }
2217       case 0xD5: { /* AAD */
2218          r_AL = ((r_AH * 10) + r_AL) & 0xff;
2219          r_AH = 0;
2220          break;
2221       }
2222       default:
2223          vassert(0);
2224    }
2225 
2226    r_O = 0; /* let's say (undefined) */
2227    r_C = 0; /* let's say (undefined) */
2228    r_A = 0; /* let's say (undefined) */
2229    r_S = (r_AL & 0x80) ? 1 : 0;
2230    r_Z = (r_AL == 0) ? 1 : 0;
2231    r_P = calc_parity_8bit( r_AL );
2232 
2233    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2234             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2235             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2236             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2237             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2238             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2239             | ( (r_AH & 0xFF) << 8 )
2240             | ( (r_AL & 0xFF) << 0 );
2241    return result;
2242 }
2243 
2244 
2245 /* CALLED FROM GENERATED CODE */
2246 /* DIRTY HELPER (non-referentially-transparent) */
2247 /* Horrible hack.  On non-x86 platforms, return 1. */
x86g_dirtyhelper_RDTSC(void)2248 ULong x86g_dirtyhelper_RDTSC ( void )
2249 {
2250 #  if defined(__i386__)
2251    ULong res;
2252    __asm__ __volatile__("rdtsc" : "=A" (res));
2253    return res;
2254 #  else
2255    return 1ULL;
2256 #  endif
2257 }
2258 
2259 
2260 /* CALLED FROM GENERATED CODE */
2261 /* DIRTY HELPER (modifies guest state) */
2262 /* Claim to be a P55C (Intel Pentium/MMX) */
x86g_dirtyhelper_CPUID_sse0(VexGuestX86State * st)2263 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2264 {
2265    switch (st->guest_EAX) {
2266       case 0:
2267          st->guest_EAX = 0x1;
2268          st->guest_EBX = 0x756e6547;
2269          st->guest_ECX = 0x6c65746e;
2270          st->guest_EDX = 0x49656e69;
2271          break;
2272       default:
2273          st->guest_EAX = 0x543;
2274          st->guest_EBX = 0x0;
2275          st->guest_ECX = 0x0;
2276          st->guest_EDX = 0x8001bf;
2277          break;
2278    }
2279 }
2280 
2281 /* CALLED FROM GENERATED CODE */
2282 /* DIRTY HELPER (modifies guest state) */
2283 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2284 /* But without 3DNow support (weird, but we really don't support it). */
x86g_dirtyhelper_CPUID_mmxext(VexGuestX86State * st)2285 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
2286 {
2287    switch (st->guest_EAX) {
2288       /* vendor ID */
2289       case 0:
2290          st->guest_EAX = 0x1;
2291          st->guest_EBX = 0x68747541;
2292          st->guest_ECX = 0x444d4163;
2293          st->guest_EDX = 0x69746e65;
2294          break;
2295       /* feature bits */
2296       case 1:
2297          st->guest_EAX = 0x621;
2298          st->guest_EBX = 0x0;
2299          st->guest_ECX = 0x0;
2300          st->guest_EDX = 0x183f9ff;
2301          break;
2302       /* Highest Extended Function Supported (0x80000004 brand string) */
2303       case 0x80000000:
2304          st->guest_EAX = 0x80000004;
2305          st->guest_EBX = 0x68747541;
2306          st->guest_ECX = 0x444d4163;
2307          st->guest_EDX = 0x69746e65;
2308          break;
2309       /* Extended Processor Info and Feature Bits */
2310       case 0x80000001:
2311          st->guest_EAX = 0x721;
2312          st->guest_EBX = 0x0;
2313          st->guest_ECX = 0x0;
2314          st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
2315          break;
2316       /* Processor Brand String "AMD Athlon(tm) Processor" */
2317       case 0x80000002:
2318          st->guest_EAX = 0x20444d41;
2319          st->guest_EBX = 0x6c687441;
2320          st->guest_ECX = 0x74286e6f;
2321          st->guest_EDX = 0x5020296d;
2322          break;
2323       case 0x80000003:
2324          st->guest_EAX = 0x65636f72;
2325          st->guest_EBX = 0x726f7373;
2326          st->guest_ECX = 0x0;
2327          st->guest_EDX = 0x0;
2328          break;
2329       default:
2330          st->guest_EAX = 0x0;
2331          st->guest_EBX = 0x0;
2332          st->guest_ECX = 0x0;
2333          st->guest_EDX = 0x0;
2334          break;
2335    }
2336 }
2337 
2338 /* CALLED FROM GENERATED CODE */
2339 /* DIRTY HELPER (modifies guest state) */
2340 /* Claim to be the following SSE1-capable CPU:
2341    vendor_id       : GenuineIntel
2342    cpu family      : 6
2343    model           : 11
2344    model name      : Intel(R) Pentium(R) III CPU family      1133MHz
2345    stepping        : 1
2346    cpu MHz         : 1131.013
2347    cache size      : 512 KB
2348 */
x86g_dirtyhelper_CPUID_sse1(VexGuestX86State * st)2349 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2350 {
2351    switch (st->guest_EAX) {
2352       case 0:
2353          st->guest_EAX = 0x00000002;
2354          st->guest_EBX = 0x756e6547;
2355          st->guest_ECX = 0x6c65746e;
2356          st->guest_EDX = 0x49656e69;
2357          break;
2358       case 1:
2359          st->guest_EAX = 0x000006b1;
2360          st->guest_EBX = 0x00000004;
2361          st->guest_ECX = 0x00000000;
2362          st->guest_EDX = 0x0383fbff;
2363          break;
2364       default:
2365          st->guest_EAX = 0x03020101;
2366          st->guest_EBX = 0x00000000;
2367          st->guest_ECX = 0x00000000;
2368          st->guest_EDX = 0x0c040883;
2369          break;
2370    }
2371 }
2372 
2373 /* Claim to be the following SSE2-capable CPU:
2374    vendor_id    : GenuineIntel
2375    cpu family   : 15
2376    model        : 2
2377    model name   : Intel(R) Pentium(R) 4 CPU 3.00GHz
2378    stepping     : 9
2379    microcode    : 0x17
2380    cpu MHz      : 2992.577
2381    cache size   : 512 KB
2382    flags        : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
2383                   pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
2384                    pebs bts cid xtpr
2385    clflush size : 64
2386    cache_alignment : 128
2387    address sizes : 36 bits physical, 32 bits virtual
2388 */
x86g_dirtyhelper_CPUID_sse2(VexGuestX86State * st)2389 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2390 {
2391    switch (st->guest_EAX) {
2392       case 0:
2393          st->guest_EAX = 0x00000002;
2394          st->guest_EBX = 0x756e6547;
2395          st->guest_ECX = 0x6c65746e;
2396          st->guest_EDX = 0x49656e69;
2397          break;
2398       case 1:
2399          st->guest_EAX = 0x00000f29;
2400          st->guest_EBX = 0x01020809;
2401          st->guest_ECX = 0x00004400;
2402          st->guest_EDX = 0xbfebfbff;
2403          break;
2404       default:
2405          st->guest_EAX = 0x03020101;
2406          st->guest_EBX = 0x00000000;
2407          st->guest_ECX = 0x00000000;
2408          st->guest_EDX = 0x0c040883;
2409          break;
2410    }
2411 }
2412 
2413 /* Claim to be the following SSSE3-capable CPU (2 x ...):
2414    vendor_id       : GenuineIntel
2415    cpu family      : 6
2416    model           : 15
2417    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2418    stepping        : 6
2419    cpu MHz         : 2394.000
2420    cache size      : 4096 KB
2421    physical id     : 0
2422    siblings        : 2
2423    core id         : 0
2424    cpu cores       : 2
2425    fpu             : yes
2426    fpu_exception   : yes
2427    cpuid level     : 10
2428    wp              : yes
2429    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2430                      mtrr pge mca cmov pat pse36 clflush dts acpi
2431                      mmx fxsr sse sse2 ss ht tm syscall nx lm
2432                      constant_tsc pni monitor ds_cpl vmx est tm2
2433                      cx16 xtpr lahf_lm
2434    bogomips        : 4798.78
2435    clflush size    : 64
2436    cache_alignment : 64
2437    address sizes   : 36 bits physical, 48 bits virtual
2438    power management:
2439 */
x86g_dirtyhelper_CPUID_sse3(VexGuestX86State * st)2440 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st )
2441 {
2442 #  define SET_ABCD(_a,_b,_c,_d)               \
2443       do { st->guest_EAX = (UInt)(_a);        \
2444            st->guest_EBX = (UInt)(_b);        \
2445            st->guest_ECX = (UInt)(_c);        \
2446            st->guest_EDX = (UInt)(_d);        \
2447       } while (0)
2448 
2449    switch (st->guest_EAX) {
2450       case 0x00000000:
2451          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2452          break;
2453       case 0x00000001:
2454          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2455          break;
2456       case 0x00000002:
2457          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2458          break;
2459       case 0x00000003:
2460          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2461          break;
2462       case 0x00000004: {
2463          switch (st->guest_ECX) {
2464             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2465                                       0x0000003f, 0x00000001); break;
2466             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2467                                       0x0000003f, 0x00000001); break;
2468             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2469                                       0x00000fff, 0x00000001); break;
2470             default:         SET_ABCD(0x00000000, 0x00000000,
2471                                       0x00000000, 0x00000000); break;
2472          }
2473          break;
2474       }
2475       case 0x00000005:
2476          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2477          break;
2478       case 0x00000006:
2479          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2480          break;
2481       case 0x00000007:
2482          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2483          break;
2484       case 0x00000008:
2485          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2486          break;
2487       case 0x00000009:
2488          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2489          break;
2490       case 0x0000000a:
2491       unhandled_eax_value:
2492          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2493          break;
2494       case 0x80000000:
2495          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2496          break;
2497       case 0x80000001:
2498          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2499          break;
2500       case 0x80000002:
2501          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2502          break;
2503       case 0x80000003:
2504          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2505          break;
2506       case 0x80000004:
2507          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2508          break;
2509       case 0x80000005:
2510          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2511          break;
2512       case 0x80000006:
2513          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2514          break;
2515       case 0x80000007:
2516          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2517          break;
2518       case 0x80000008:
2519          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2520          break;
2521       default:
2522          goto unhandled_eax_value;
2523    }
2524 #  undef SET_ABCD
2525 }
2526 
2527 
2528 /* CALLED FROM GENERATED CODE */
2529 /* DIRTY HELPER (non-referentially-transparent) */
2530 /* Horrible hack.  On non-x86 platforms, return 0. */
x86g_dirtyhelper_IN(UInt portno,UInt sz)2531 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2532 {
2533 #  if defined(__i386__)
2534    UInt r = 0;
2535    portno &= 0xFFFF;
2536    switch (sz) {
2537       case 4:
2538          __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2539                               : "=a" (r) : "Nd" (portno));
2540 	 break;
2541       case 2:
2542          __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2543                               : "=a" (r) : "Nd" (portno));
2544 	 break;
2545       case 1:
2546          __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2547                               : "=a" (r) : "Nd" (portno));
2548 	 break;
2549       default:
2550          break;
2551    }
2552    return r;
2553 #  else
2554    return 0;
2555 #  endif
2556 }
2557 
2558 
2559 /* CALLED FROM GENERATED CODE */
2560 /* DIRTY HELPER (non-referentially-transparent) */
2561 /* Horrible hack.  On non-x86 platforms, do nothing. */
x86g_dirtyhelper_OUT(UInt portno,UInt data,UInt sz)2562 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2563 {
2564 #  if defined(__i386__)
2565    portno &= 0xFFFF;
2566    switch (sz) {
2567       case 4:
2568          __asm__ __volatile__("outl %0, %w1"
2569                               : : "a" (data), "Nd" (portno));
2570 	 break;
2571       case 2:
2572          __asm__ __volatile__("outw %w0, %w1"
2573                               : : "a" (data), "Nd" (portno));
2574 	 break;
2575       case 1:
2576          __asm__ __volatile__("outb %b0, %w1"
2577                               : : "a" (data), "Nd" (portno));
2578 	 break;
2579       default:
2580          break;
2581    }
2582 #  else
2583    /* do nothing */
2584 #  endif
2585 }
2586 
2587 /* CALLED FROM GENERATED CODE */
2588 /* DIRTY HELPER (non-referentially-transparent) */
2589 /* Horrible hack.  On non-x86 platforms, do nothing. */
2590 /* op = 0: call the native SGDT instruction.
2591    op = 1: call the native SIDT instruction.
2592 */
x86g_dirtyhelper_SxDT(void * address,UInt op)2593 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2594 #  if defined(__i386__)
2595    switch (op) {
2596       case 0:
2597          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2598          break;
2599       case 1:
2600          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2601          break;
2602       default:
2603          vpanic("x86g_dirtyhelper_SxDT");
2604    }
2605 #  else
2606    /* do nothing */
2607    UChar* p = (UChar*)address;
2608    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2609 #  endif
2610 }
2611 
2612 /*---------------------------------------------------------------*/
2613 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
2614 /*---------------------------------------------------------------*/
2615 
abdU8(UChar xx,UChar yy)2616 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2617    return toUChar(xx>yy ? xx-yy : yy-xx);
2618 }
2619 
mk32x2(UInt w1,UInt w0)2620 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2621    return (((ULong)w1) << 32) | ((ULong)w0);
2622 }
2623 
sel16x4_3(ULong w64)2624 static inline UShort sel16x4_3 ( ULong w64 ) {
2625    UInt hi32 = toUInt(w64 >> 32);
2626    return toUShort(hi32 >> 16);
2627 }
sel16x4_2(ULong w64)2628 static inline UShort sel16x4_2 ( ULong w64 ) {
2629    UInt hi32 = toUInt(w64 >> 32);
2630    return toUShort(hi32);
2631 }
sel16x4_1(ULong w64)2632 static inline UShort sel16x4_1 ( ULong w64 ) {
2633    UInt lo32 = toUInt(w64);
2634    return toUShort(lo32 >> 16);
2635 }
sel16x4_0(ULong w64)2636 static inline UShort sel16x4_0 ( ULong w64 ) {
2637    UInt lo32 = toUInt(w64);
2638    return toUShort(lo32);
2639 }
2640 
sel8x8_7(ULong w64)2641 static inline UChar sel8x8_7 ( ULong w64 ) {
2642    UInt hi32 = toUInt(w64 >> 32);
2643    return toUChar(hi32 >> 24);
2644 }
sel8x8_6(ULong w64)2645 static inline UChar sel8x8_6 ( ULong w64 ) {
2646    UInt hi32 = toUInt(w64 >> 32);
2647    return toUChar(hi32 >> 16);
2648 }
sel8x8_5(ULong w64)2649 static inline UChar sel8x8_5 ( ULong w64 ) {
2650    UInt hi32 = toUInt(w64 >> 32);
2651    return toUChar(hi32 >> 8);
2652 }
sel8x8_4(ULong w64)2653 static inline UChar sel8x8_4 ( ULong w64 ) {
2654    UInt hi32 = toUInt(w64 >> 32);
2655    return toUChar(hi32 >> 0);
2656 }
sel8x8_3(ULong w64)2657 static inline UChar sel8x8_3 ( ULong w64 ) {
2658    UInt lo32 = toUInt(w64);
2659    return toUChar(lo32 >> 24);
2660 }
sel8x8_2(ULong w64)2661 static inline UChar sel8x8_2 ( ULong w64 ) {
2662    UInt lo32 = toUInt(w64);
2663    return toUChar(lo32 >> 16);
2664 }
sel8x8_1(ULong w64)2665 static inline UChar sel8x8_1 ( ULong w64 ) {
2666    UInt lo32 = toUInt(w64);
2667    return toUChar(lo32 >> 8);
2668 }
sel8x8_0(ULong w64)2669 static inline UChar sel8x8_0 ( ULong w64 ) {
2670    UInt lo32 = toUInt(w64);
2671    return toUChar(lo32 >> 0);
2672 }
2673 
2674 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_pmaddwd(ULong xx,ULong yy)2675 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2676 {
2677    return
2678       mk32x2(
2679          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2680             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2681          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2682             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2683       );
2684 }
2685 
2686 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_psadbw(ULong xx,ULong yy)2687 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2688 {
2689    UInt t = 0;
2690    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2691    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2692    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2693    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2694    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2695    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2696    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2697    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2698    t &= 0xFFFF;
2699    return (ULong)t;
2700 }
2701 
2702 
2703 /*---------------------------------------------------------------*/
2704 /*--- Helpers for dealing with segment overrides.             ---*/
2705 /*---------------------------------------------------------------*/
2706 
2707 static inline
get_segdescr_base(VexGuestX86SegDescr * ent)2708 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2709 {
2710    UInt lo  = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2711    UInt mid =   0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2712    UInt hi  =   0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2713    return (hi << 24) | (mid << 16) | lo;
2714 }
2715 
2716 static inline
get_segdescr_limit(VexGuestX86SegDescr * ent)2717 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2718 {
2719     UInt lo    = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2720     UInt hi    =    0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2721     UInt limit = (hi << 16) | lo;
2722     if (ent->LdtEnt.Bits.Granularity)
2723        limit = (limit << 12) | 0xFFF;
2724     return limit;
2725 }
2726 
2727 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_use_seg_selector(HWord ldt,HWord gdt,UInt seg_selector,UInt virtual_addr)2728 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2729                               UInt seg_selector, UInt virtual_addr )
2730 {
2731    UInt tiBit, base, limit;
2732    VexGuestX86SegDescr* the_descrs;
2733 
2734    Bool verboze = False;
2735 
2736    /* If this isn't true, we're in Big Trouble. */
2737    vassert(8 == sizeof(VexGuestX86SegDescr));
2738 
2739    if (verboze)
2740       vex_printf("x86h_use_seg_selector: "
2741                  "seg_selector = 0x%x, vaddr = 0x%x\n",
2742                  seg_selector, virtual_addr);
2743 
2744    /* Check for wildly invalid selector. */
2745    if (seg_selector & ~0xFFFF)
2746       goto bad;
2747 
2748    seg_selector &= 0x0000FFFF;
2749 
2750    /* Sanity check the segment selector.  Ensure that RPL=11b (least
2751       privilege).  This forms the bottom 2 bits of the selector. */
2752    if ((seg_selector & 3) != 3)
2753       goto bad;
2754 
2755    /* Extract the TI bit (0 means GDT, 1 means LDT) */
2756    tiBit = (seg_selector >> 2) & 1;
2757 
2758    /* Convert the segment selector onto a table index */
2759    seg_selector >>= 3;
2760    vassert(seg_selector >= 0 && seg_selector < 8192);
2761 
2762    if (tiBit == 0) {
2763 
2764       /* GDT access. */
2765       /* Do we actually have a GDT to look at? */
2766       if (gdt == 0)
2767          goto bad;
2768 
2769       /* Check for access to non-existent entry. */
2770       if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
2771          goto bad;
2772 
2773       the_descrs = (VexGuestX86SegDescr*)gdt;
2774       base  = get_segdescr_base (&the_descrs[seg_selector]);
2775       limit = get_segdescr_limit(&the_descrs[seg_selector]);
2776 
2777    } else {
2778 
2779       /* All the same stuff, except for the LDT. */
2780       if (ldt == 0)
2781          goto bad;
2782 
2783       if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2784          goto bad;
2785 
2786       the_descrs = (VexGuestX86SegDescr*)ldt;
2787       base  = get_segdescr_base (&the_descrs[seg_selector]);
2788       limit = get_segdescr_limit(&the_descrs[seg_selector]);
2789 
2790    }
2791 
2792    /* Do the limit check.  Note, this check is just slightly too
2793       slack.  Really it should be "if (virtual_addr + size - 1 >=
2794       limit)," but we don't have the size info to hand.  Getting it
2795       could be significantly complex.  */
2796    if (virtual_addr >= limit)
2797       goto bad;
2798 
2799    if (verboze)
2800       vex_printf("x86h_use_seg_selector: "
2801                  "base = 0x%x, addr = 0x%x\n",
2802                  base, base + virtual_addr);
2803 
2804    /* High 32 bits are zero, indicating success. */
2805    return (ULong)( ((UInt)virtual_addr) + base );
2806 
2807  bad:
2808    return 1ULL << 32;
2809 }
2810 
2811 
2812 /*---------------------------------------------------------------*/
2813 /*--- Helpers for dealing with, and describing,               ---*/
2814 /*--- guest state as a whole.                                 ---*/
2815 /*---------------------------------------------------------------*/
2816 
2817 /* Initialise the entire x86 guest state. */
2818 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_initialise(VexGuestX86State * vex_state)2819 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2820 {
2821    vex_state->host_EvC_FAILADDR = 0;
2822    vex_state->host_EvC_COUNTER = 0;
2823 
2824    vex_state->guest_EAX = 0;
2825    vex_state->guest_ECX = 0;
2826    vex_state->guest_EDX = 0;
2827    vex_state->guest_EBX = 0;
2828    vex_state->guest_ESP = 0;
2829    vex_state->guest_EBP = 0;
2830    vex_state->guest_ESI = 0;
2831    vex_state->guest_EDI = 0;
2832 
2833    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
2834    vex_state->guest_CC_DEP1 = 0;
2835    vex_state->guest_CC_DEP2 = 0;
2836    vex_state->guest_CC_NDEP = 0;
2837    vex_state->guest_DFLAG   = 1; /* forwards */
2838    vex_state->guest_IDFLAG  = 0;
2839    vex_state->guest_ACFLAG  = 0;
2840 
2841    vex_state->guest_EIP = 0;
2842 
2843    /* Initialise the simulated FPU */
2844    x86g_dirtyhelper_FINIT( vex_state );
2845 
2846    /* Initialse the SSE state. */
2847 #  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2848 
2849    vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2850    SSEZERO(vex_state->guest_XMM0);
2851    SSEZERO(vex_state->guest_XMM1);
2852    SSEZERO(vex_state->guest_XMM2);
2853    SSEZERO(vex_state->guest_XMM3);
2854    SSEZERO(vex_state->guest_XMM4);
2855    SSEZERO(vex_state->guest_XMM5);
2856    SSEZERO(vex_state->guest_XMM6);
2857    SSEZERO(vex_state->guest_XMM7);
2858 
2859 #  undef SSEZERO
2860 
2861    vex_state->guest_CS  = 0;
2862    vex_state->guest_DS  = 0;
2863    vex_state->guest_ES  = 0;
2864    vex_state->guest_FS  = 0;
2865    vex_state->guest_GS  = 0;
2866    vex_state->guest_SS  = 0;
2867    vex_state->guest_LDT = 0;
2868    vex_state->guest_GDT = 0;
2869 
2870    vex_state->guest_EMNOTE = EmNote_NONE;
2871 
2872    /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2873    vex_state->guest_CMSTART = 0;
2874    vex_state->guest_CMLEN   = 0;
2875 
2876    vex_state->guest_NRADDR   = 0;
2877    vex_state->guest_SC_CLASS = 0;
2878    vex_state->guest_IP_AT_SYSCALL = 0;
2879 
2880    vex_state->padding1 = 0;
2881    vex_state->padding2 = 0;
2882    vex_state->padding3 = 0;
2883 }
2884 
2885 
2886 /* Figure out if any part of the guest state contained in minoff
2887    .. maxoff requires precise memory exceptions.  If in doubt return
2888    True (but this generates significantly slower code).
2889 
2890    By default we enforce precise exns for guest %ESP, %EBP and %EIP
2891    only.  These are the minimum needed to extract correct stack
2892    backtraces from x86 code.
2893 
2894    Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2895 */
guest_x86_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)2896 Bool guest_x86_state_requires_precise_mem_exns (
2897         Int minoff, Int maxoff, VexRegisterUpdates pxControl
2898      )
2899 {
2900    Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2901    Int ebp_max = ebp_min + 4 - 1;
2902    Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2903    Int esp_max = esp_min + 4 - 1;
2904    Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2905    Int eip_max = eip_min + 4 - 1;
2906 
2907    if (maxoff < esp_min || minoff > esp_max) {
2908       /* no overlap with esp */
2909       if (pxControl == VexRegUpdSpAtMemAccess)
2910          return False; // We only need to check stack pointer.
2911    } else {
2912       return True;
2913    }
2914 
2915    if (maxoff < ebp_min || minoff > ebp_max) {
2916       /* no overlap with ebp */
2917    } else {
2918       return True;
2919    }
2920 
2921    if (maxoff < eip_min || minoff > eip_max) {
2922       /* no overlap with eip */
2923    } else {
2924       return True;
2925    }
2926 
2927    return False;
2928 }
2929 
2930 
2931 #define ALWAYSDEFD(field)                           \
2932     { offsetof(VexGuestX86State, field),            \
2933       (sizeof ((VexGuestX86State*)0)->field) }
2934 
2935 VexGuestLayout
2936    x86guest_layout
2937       = {
2938           /* Total size of the guest state, in bytes. */
2939           .total_sizeB = sizeof(VexGuestX86State),
2940 
2941           /* Describe the stack pointer. */
2942           .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2943           .sizeof_SP = 4,
2944 
2945           /* Describe the frame pointer. */
2946           .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2947           .sizeof_FP = 4,
2948 
2949           /* Describe the instruction pointer. */
2950           .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2951           .sizeof_IP = 4,
2952 
2953           /* Describe any sections to be regarded by Memcheck as
2954              'always-defined'. */
2955           .n_alwaysDefd = 24,
2956 
2957           /* flags thunk: OP and NDEP are always defd, whereas DEP1
2958              and DEP2 have to be tracked.  See detailed comment in
2959              gdefs.h on meaning of thunk fields. */
2960           .alwaysDefd
2961              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
2962                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
2963                  /*  2 */ ALWAYSDEFD(guest_DFLAG),
2964                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
2965                  /*  4 */ ALWAYSDEFD(guest_ACFLAG),
2966                  /*  5 */ ALWAYSDEFD(guest_EIP),
2967                  /*  6 */ ALWAYSDEFD(guest_FTOP),
2968                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
2969                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
2970                  /*  9 */ ALWAYSDEFD(guest_FC3210),
2971                  /* 10 */ ALWAYSDEFD(guest_CS),
2972                  /* 11 */ ALWAYSDEFD(guest_DS),
2973                  /* 12 */ ALWAYSDEFD(guest_ES),
2974                  /* 13 */ ALWAYSDEFD(guest_FS),
2975                  /* 14 */ ALWAYSDEFD(guest_GS),
2976                  /* 15 */ ALWAYSDEFD(guest_SS),
2977                  /* 16 */ ALWAYSDEFD(guest_LDT),
2978                  /* 17 */ ALWAYSDEFD(guest_GDT),
2979                  /* 18 */ ALWAYSDEFD(guest_EMNOTE),
2980                  /* 19 */ ALWAYSDEFD(guest_SSEROUND),
2981                  /* 20 */ ALWAYSDEFD(guest_CMSTART),
2982                  /* 21 */ ALWAYSDEFD(guest_CMLEN),
2983                  /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
2984                  /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
2985                }
2986         };
2987 
2988 
2989 /*---------------------------------------------------------------*/
2990 /*--- end                                 guest_x86_helpers.c ---*/
2991 /*---------------------------------------------------------------*/
2992