• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                               guest_x86_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2015 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_x86.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41 
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_x86_defs.h"
46 #include "guest_generic_x87.h"
47 
48 
49 /* This file contains helper functions for x86 guest code.
50    Calls to these functions are generated by the back end.
51    These calls are of course in the host machine code and
52    this file will be compiled to host machine code, so that
53    all makes sense.
54 
55    Only change the signatures of these helper functions very
56    carefully.  If you change the signature here, you'll have to change
57    the parameters passed to it in the IR calls constructed by
58    guest-x86/toIR.c.
59 
60    The convention used is that all functions called from generated
61    code are named x86g_<something>, and any function whose name lacks
62    that prefix is not called from generated code.  Note that some
63    LibVEX_* functions can however be called by VEX's client, but that
64    is not the same as calling them from VEX-generated code.
65 */
66 
67 
68 /* Set to 1 to get detailed profiling info about use of the flag
69    machinery. */
70 #define PROFILE_EFLAGS 0
71 
72 
73 /*---------------------------------------------------------------*/
74 /*--- %eflags run-time helpers.                               ---*/
75 /*---------------------------------------------------------------*/
76 
77 static const UChar parity_table[256] = {
78     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
79     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
80     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
81     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
82     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
83     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
84     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
85     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
86     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
87     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
88     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
89     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
90     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
91     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
92     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
93     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
94     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
95     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
96     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
97     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
98     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
99     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
100     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
101     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
102     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
103     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
104     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
105     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
106     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
107     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
108     X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
109     0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
110 };
111 
112 /* generalised left-shifter */
lshift(Int x,Int n)113 inline static Int lshift ( Int x, Int n )
114 {
115    if (n >= 0)
116       return (UInt)x << n;
117    else
118       return x >> (-n);
119 }
120 
121 /* identity on ULong */
idULong(ULong x)122 static inline ULong idULong ( ULong x )
123 {
124    return x;
125 }
126 
127 
128 #define PREAMBLE(__data_bits)					\
129    /* const */ UInt DATA_MASK 					\
130       = __data_bits==8 ? 0xFF 					\
131                        : (__data_bits==16 ? 0xFFFF 		\
132                                           : 0xFFFFFFFF); 	\
133    /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1);	\
134    /* const */ UInt CC_DEP1 = cc_dep1_formal;			\
135    /* const */ UInt CC_DEP2 = cc_dep2_formal;			\
136    /* const */ UInt CC_NDEP = cc_ndep_formal;			\
137    /* Four bogus assignments, which hopefully gcc can     */	\
138    /* optimise away, and which stop it complaining about  */	\
139    /* unused variables.                                   */	\
140    SIGN_MASK = SIGN_MASK;					\
141    DATA_MASK = DATA_MASK;					\
142    CC_DEP2 = CC_DEP2;						\
143    CC_NDEP = CC_NDEP;
144 
145 
146 /*-------------------------------------------------------------*/
147 
148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
149 {								\
150    PREAMBLE(DATA_BITS);						\
151    { UInt cf, pf, af, zf, sf, of;				\
152      UInt argL, argR, res;					\
153      argL = CC_DEP1;						\
154      argR = CC_DEP2;						\
155      res  = argL + argR;					\
156      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
157      pf = parity_table[(UChar)res];				\
158      af = (res ^ argL ^ argR) & 0x10;				\
159      zf = ((DATA_UTYPE)res == 0) << 6;				\
160      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
161      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
162                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
163      return cf | pf | af | zf | sf | of;			\
164    }								\
165 }
166 
167 /*-------------------------------------------------------------*/
168 
169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
170 {								\
171    PREAMBLE(DATA_BITS);						\
172    { UInt cf, pf, af, zf, sf, of;				\
173      UInt argL, argR, res;					\
174      argL = CC_DEP1;						\
175      argR = CC_DEP2;						\
176      res  = argL - argR;					\
177      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
178      pf = parity_table[(UChar)res];				\
179      af = (res ^ argL ^ argR) & 0x10;				\
180      zf = ((DATA_UTYPE)res == 0) << 6;				\
181      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
182      of = lshift((argL ^ argR) & (argL ^ res),	 		\
183                  12 - DATA_BITS) & X86G_CC_MASK_O; 		\
184      return cf | pf | af | zf | sf | of;			\
185    }								\
186 }
187 
188 /*-------------------------------------------------------------*/
189 
190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
191 {								\
192    PREAMBLE(DATA_BITS);						\
193    { UInt cf, pf, af, zf, sf, of;				\
194      UInt argL, argR, oldC, res;		       		\
195      oldC = CC_NDEP & X86G_CC_MASK_C;				\
196      argL = CC_DEP1;						\
197      argR = CC_DEP2 ^ oldC;	       				\
198      res  = (argL + argR) + oldC;				\
199      if (oldC)							\
200         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
201      else							\
202         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
203      pf = parity_table[(UChar)res];				\
204      af = (res ^ argL ^ argR) & 0x10;				\
205      zf = ((DATA_UTYPE)res == 0) << 6;				\
206      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
207      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
208                   12 - DATA_BITS) & X86G_CC_MASK_O;		\
209      return cf | pf | af | zf | sf | of;			\
210    }								\
211 }
212 
213 /*-------------------------------------------------------------*/
214 
215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
216 {								\
217    PREAMBLE(DATA_BITS);						\
218    { UInt cf, pf, af, zf, sf, of;				\
219      UInt argL, argR, oldC, res;		       		\
220      oldC = CC_NDEP & X86G_CC_MASK_C;				\
221      argL = CC_DEP1;						\
222      argR = CC_DEP2 ^ oldC;	       				\
223      res  = (argL - argR) - oldC;				\
224      if (oldC)							\
225         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
226      else							\
227         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
228      pf = parity_table[(UChar)res];				\
229      af = (res ^ argL ^ argR) & 0x10;				\
230      zf = ((DATA_UTYPE)res == 0) << 6;				\
231      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
232      of = lshift((argL ^ argR) & (argL ^ res), 			\
233                  12 - DATA_BITS) & X86G_CC_MASK_O;		\
234      return cf | pf | af | zf | sf | of;			\
235    }								\
236 }
237 
238 /*-------------------------------------------------------------*/
239 
240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
241 {								\
242    PREAMBLE(DATA_BITS);						\
243    { UInt cf, pf, af, zf, sf, of;				\
244      cf = 0;							\
245      pf = parity_table[(UChar)CC_DEP1];				\
246      af = 0;							\
247      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
248      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
249      of = 0;							\
250      return cf | pf | af | zf | sf | of;			\
251    }								\
252 }
253 
254 /*-------------------------------------------------------------*/
255 
256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
257 {								\
258    PREAMBLE(DATA_BITS);						\
259    { UInt cf, pf, af, zf, sf, of;				\
260      UInt argL, argR, res;					\
261      res  = CC_DEP1;						\
262      argL = res - 1;						\
263      argR = 1;							\
264      cf = CC_NDEP & X86G_CC_MASK_C;				\
265      pf = parity_table[(UChar)res];				\
266      af = (res ^ argL ^ argR) & 0x10;				\
267      zf = ((DATA_UTYPE)res == 0) << 6;				\
268      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
269      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
270      return cf | pf | af | zf | sf | of;			\
271    }								\
272 }
273 
274 /*-------------------------------------------------------------*/
275 
276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
277 {								\
278    PREAMBLE(DATA_BITS);						\
279    { UInt cf, pf, af, zf, sf, of;				\
280      UInt argL, argR, res;					\
281      res  = CC_DEP1;						\
282      argL = res + 1;						\
283      argR = 1;							\
284      cf = CC_NDEP & X86G_CC_MASK_C;				\
285      pf = parity_table[(UChar)res];				\
286      af = (res ^ argL ^ argR) & 0x10;				\
287      zf = ((DATA_UTYPE)res == 0) << 6;				\
288      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
289      of = ((res & DATA_MASK) 					\
290           == ((UInt)SIGN_MASK - 1)) << 11;			\
291      return cf | pf | af | zf | sf | of;			\
292    }								\
293 }
294 
295 /*-------------------------------------------------------------*/
296 
297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
298 {								\
299    PREAMBLE(DATA_BITS);						\
300    { UInt cf, pf, af, zf, sf, of;				\
301      cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C;	\
302      pf = parity_table[(UChar)CC_DEP1];				\
303      af = 0; /* undefined */					\
304      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
305      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
306      /* of is defined if shift count == 1 */			\
307      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
308           & X86G_CC_MASK_O;					\
309      return cf | pf | af | zf | sf | of;			\
310    }								\
311 }
312 
313 /*-------------------------------------------------------------*/
314 
315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
316 {								\
317    PREAMBLE(DATA_BITS);  					\
318    { UInt cf, pf, af, zf, sf, of;				\
319      cf = CC_DEP2 & 1;						\
320      pf = parity_table[(UChar)CC_DEP1];				\
321      af = 0; /* undefined */					\
322      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
323      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
324      /* of is defined if shift count == 1 */			\
325      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
326           & X86G_CC_MASK_O;					\
327      return cf | pf | af | zf | sf | of;			\
328    }								\
329 }
330 
331 /*-------------------------------------------------------------*/
332 
333 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
334 /* DEP1 = result, NDEP = old flags */
335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
336 {								\
337    PREAMBLE(DATA_BITS);						\
338    { UInt fl 							\
339         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
340           | (X86G_CC_MASK_C & CC_DEP1)				\
341           | (X86G_CC_MASK_O & (lshift(CC_DEP1,  		\
342                                       11-(DATA_BITS-1)) 	\
343                      ^ lshift(CC_DEP1, 11)));			\
344      return fl;							\
345    }								\
346 }
347 
348 /*-------------------------------------------------------------*/
349 
350 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
351 /* DEP1 = result, NDEP = old flags */
352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
353 {								\
354    PREAMBLE(DATA_BITS);						\
355    { UInt fl 							\
356         = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C))	\
357           | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
358           | (X86G_CC_MASK_O & (lshift(CC_DEP1, 			\
359                                       11-(DATA_BITS-1)) 	\
360                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
361      return fl;							\
362    }								\
363 }
364 
365 /*-------------------------------------------------------------*/
366 
367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
368                                 DATA_U2TYPE, NARROWto2U)        \
369 {                                                               \
370    PREAMBLE(DATA_BITS);                                         \
371    { UInt cf, pf, af, zf, sf, of;                               \
372      DATA_UTYPE  hi;                                            \
373      DATA_UTYPE  lo                                             \
374         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
375                      * ((DATA_UTYPE)CC_DEP2) );                 \
376      DATA_U2TYPE rr                                             \
377         = NARROWto2U(                                           \
378              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
379              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
380      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
381      cf = (hi != 0);                                            \
382      pf = parity_table[(UChar)lo];                              \
383      af = 0; /* undefined */                                    \
384      zf = (lo == 0) << 6;                                       \
385      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
386      of = cf << 11;                                             \
387      return cf | pf | af | zf | sf | of;                        \
388    }								\
389 }
390 
391 /*-------------------------------------------------------------*/
392 
393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
394                                 DATA_S2TYPE, NARROWto2S)        \
395 {                                                               \
396    PREAMBLE(DATA_BITS);                                         \
397    { UInt cf, pf, af, zf, sf, of;                               \
398      DATA_STYPE  hi;                                            \
399      DATA_STYPE  lo                                             \
400         = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1)         \
401                      * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) );    \
402      DATA_S2TYPE rr                                             \
403         = NARROWto2S(                                           \
404              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
405              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
406      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
407      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
408      pf = parity_table[(UChar)lo];                              \
409      af = 0; /* undefined */                                    \
410      zf = (lo == 0) << 6;                                       \
411      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
412      of = cf << 11;                                             \
413      return cf | pf | af | zf | sf | of;                        \
414    }								\
415 }
416 
417 
418 #if PROFILE_EFLAGS
419 
420 static Bool initted     = False;
421 
422 /* C flag, fast route */
423 static UInt tabc_fast[X86G_CC_OP_NUMBER];
424 /* C flag, slow route */
425 static UInt tabc_slow[X86G_CC_OP_NUMBER];
426 /* table for calculate_cond */
427 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
428 /* total entry counts for calc_all, calc_c, calc_cond. */
429 static UInt n_calc_all  = 0;
430 static UInt n_calc_c    = 0;
431 static UInt n_calc_cond = 0;
432 
433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
434 
435 
showCounts(void)436 static void showCounts ( void )
437 {
438    Int op, co;
439    HChar ch;
440    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
441               n_calc_all, n_calc_cond, n_calc_c);
442 
443    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
444               "    S   NS    P   NP    L   NL   LE  NLE\n");
445    vex_printf("     -----------------------------------------------------"
446               "----------------------------------------\n");
447    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
448 
449       ch = ' ';
450       if (op > 0 && (op-1) % 3 == 0)
451          ch = 'B';
452       if (op > 0 && (op-1) % 3 == 1)
453          ch = 'W';
454       if (op > 0 && (op-1) % 3 == 2)
455          ch = 'L';
456 
457       vex_printf("%2d%c: ", op, ch);
458       vex_printf("%6u ", tabc_slow[op]);
459       vex_printf("%6u ", tabc_fast[op]);
460       for (co = 0; co < 16; co++) {
461          Int n = tab_cond[op][co];
462          if (n >= 1000) {
463             vex_printf(" %3dK", n / 1000);
464          } else
465          if (n >= 0) {
466             vex_printf(" %3d ", n );
467          } else {
468             vex_printf("     ");
469          }
470       }
471       vex_printf("\n");
472    }
473    vex_printf("\n");
474 }
475 
initCounts(void)476 static void initCounts ( void )
477 {
478    Int op, co;
479    initted = True;
480    for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
481       tabc_fast[op] = tabc_slow[op] = 0;
482       for (co = 0; co < 16; co++)
483          tab_cond[op][co] = 0;
484    }
485 }
486 
487 #endif /* PROFILE_EFLAGS */
488 
489 
490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
491 /* Calculate all the 6 flags from the supplied thunk parameters.
492    Worker function, not directly called from generated code. */
493 static
x86g_calculate_eflags_all_WRK(UInt cc_op,UInt cc_dep1_formal,UInt cc_dep2_formal,UInt cc_ndep_formal)494 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
495                                      UInt cc_dep1_formal,
496                                      UInt cc_dep2_formal,
497                                      UInt cc_ndep_formal )
498 {
499    switch (cc_op) {
500       case X86G_CC_OP_COPY:
501          return cc_dep1_formal
502                 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
503                    | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
504 
505       case X86G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
506       case X86G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
507       case X86G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
508 
509       case X86G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
510       case X86G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
511       case X86G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
512 
513       case X86G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
514       case X86G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
515       case X86G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
516 
517       case X86G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
518       case X86G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
519       case X86G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
520 
521       case X86G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
522       case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
523       case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
524 
525       case X86G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
526       case X86G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
527       case X86G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
528 
529       case X86G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
530       case X86G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
531       case X86G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
532 
533       case X86G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
534       case X86G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
535       case X86G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
536 
537       case X86G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
538       case X86G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
539       case X86G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
540 
541       case X86G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
542       case X86G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
543       case X86G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
544 
545       case X86G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
546       case X86G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
547       case X86G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
548 
549       case X86G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
550                                                 UShort, toUShort );
551       case X86G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
552                                                 UInt,   toUInt );
553       case X86G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
554                                                 ULong,  idULong );
555 
556       case X86G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
557                                                 Short,  toUShort );
558       case X86G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
559                                                 Int,    toUInt   );
560       case X86G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
561                                                 Long,   idULong );
562 
563       default:
564          /* shouldn't really make these calls from generated code */
565          vex_printf("x86g_calculate_eflags_all_WRK(X86)"
566                     "( %u, 0x%x, 0x%x, 0x%x )\n",
567                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
568          vpanic("x86g_calculate_eflags_all_WRK(X86)");
569    }
570 }
571 
572 
573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
574 /* Calculate all the 6 flags from the supplied thunk parameters. */
x86g_calculate_eflags_all(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)575 UInt x86g_calculate_eflags_all ( UInt cc_op,
576                                  UInt cc_dep1,
577                                  UInt cc_dep2,
578                                  UInt cc_ndep )
579 {
580 #  if PROFILE_EFLAGS
581    if (!initted) initCounts();
582    n_calc_all++;
583    if (SHOW_COUNTS_NOW) showCounts();
584 #  endif
585    return
586       x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
587 }
588 
589 
590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
591 /* Calculate just the carry flag from the supplied thunk parameters. */
592 VEX_REGPARM(3)
x86g_calculate_eflags_c(UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)593 UInt x86g_calculate_eflags_c ( UInt cc_op,
594                                UInt cc_dep1,
595                                UInt cc_dep2,
596                                UInt cc_ndep )
597 {
598 #  if PROFILE_EFLAGS
599    if (!initted) initCounts();
600    n_calc_c++;
601    tabc_fast[cc_op]++;
602    if (SHOW_COUNTS_NOW) showCounts();
603 #  endif
604 
605    /* Fast-case some common ones. */
606    switch (cc_op) {
607       case X86G_CC_OP_LOGICL:
608       case X86G_CC_OP_LOGICW:
609       case X86G_CC_OP_LOGICB:
610          return 0;
611       case X86G_CC_OP_SUBL:
612          return ((UInt)cc_dep1) < ((UInt)cc_dep2)
613                    ? X86G_CC_MASK_C : 0;
614       case X86G_CC_OP_SUBW:
615          return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
616                    ? X86G_CC_MASK_C : 0;
617       case X86G_CC_OP_SUBB:
618          return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
619                    ? X86G_CC_MASK_C : 0;
620       case X86G_CC_OP_INCL:
621       case X86G_CC_OP_DECL:
622          return cc_ndep & X86G_CC_MASK_C;
623       default:
624          break;
625    }
626 
627 #  if PROFILE_EFLAGS
628    tabc_fast[cc_op]--;
629    tabc_slow[cc_op]++;
630 #  endif
631 
632    return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
633           & X86G_CC_MASK_C;
634 }
635 
636 
637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
638 /* returns 1 or 0 */
x86g_calculate_condition(UInt cond,UInt cc_op,UInt cc_dep1,UInt cc_dep2,UInt cc_ndep)639 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
640                                 UInt cc_op,
641                                 UInt cc_dep1,
642                                 UInt cc_dep2,
643                                 UInt cc_ndep )
644 {
645    UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
646                                                cc_dep2, cc_ndep);
647    UInt of,sf,zf,cf,pf;
648    UInt inv = cond & 1;
649 
650 #  if PROFILE_EFLAGS
651    if (!initted) initCounts();
652    tab_cond[cc_op][cond]++;
653    n_calc_cond++;
654    if (SHOW_COUNTS_NOW) showCounts();
655 #  endif
656 
657    switch (cond) {
658       case X86CondNO:
659       case X86CondO: /* OF == 1 */
660          of = eflags >> X86G_CC_SHIFT_O;
661          return 1 & (inv ^ of);
662 
663       case X86CondNZ:
664       case X86CondZ: /* ZF == 1 */
665          zf = eflags >> X86G_CC_SHIFT_Z;
666          return 1 & (inv ^ zf);
667 
668       case X86CondNB:
669       case X86CondB: /* CF == 1 */
670          cf = eflags >> X86G_CC_SHIFT_C;
671          return 1 & (inv ^ cf);
672          break;
673 
674       case X86CondNBE:
675       case X86CondBE: /* (CF or ZF) == 1 */
676          cf = eflags >> X86G_CC_SHIFT_C;
677          zf = eflags >> X86G_CC_SHIFT_Z;
678          return 1 & (inv ^ (cf | zf));
679          break;
680 
681       case X86CondNS:
682       case X86CondS: /* SF == 1 */
683          sf = eflags >> X86G_CC_SHIFT_S;
684          return 1 & (inv ^ sf);
685 
686       case X86CondNP:
687       case X86CondP: /* PF == 1 */
688          pf = eflags >> X86G_CC_SHIFT_P;
689          return 1 & (inv ^ pf);
690 
691       case X86CondNL:
692       case X86CondL: /* (SF xor OF) == 1 */
693          sf = eflags >> X86G_CC_SHIFT_S;
694          of = eflags >> X86G_CC_SHIFT_O;
695          return 1 & (inv ^ (sf ^ of));
696          break;
697 
698       case X86CondNLE:
699       case X86CondLE: /* ((SF xor OF) or ZF)  == 1 */
700          sf = eflags >> X86G_CC_SHIFT_S;
701          of = eflags >> X86G_CC_SHIFT_O;
702          zf = eflags >> X86G_CC_SHIFT_Z;
703          return 1 & (inv ^ ((sf ^ of) | zf));
704          break;
705 
706       default:
707          /* shouldn't really make these calls from generated code */
708          vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
709                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
710          vpanic("x86g_calculate_condition");
711    }
712 }
713 
714 
715 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_get_eflags(const VexGuestX86State * vex_state)716 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
717 {
718    UInt eflags = x86g_calculate_eflags_all_WRK(
719                     vex_state->guest_CC_OP,
720                     vex_state->guest_CC_DEP1,
721                     vex_state->guest_CC_DEP2,
722                     vex_state->guest_CC_NDEP
723                  );
724    UInt dflag = vex_state->guest_DFLAG;
725    vassert(dflag == 1 || dflag == 0xFFFFFFFF);
726    if (dflag == 0xFFFFFFFF)
727       eflags |= X86G_CC_MASK_D;
728    if (vex_state->guest_IDFLAG == 1)
729       eflags |= X86G_CC_MASK_ID;
730    if (vex_state->guest_ACFLAG == 1)
731       eflags |= X86G_CC_MASK_AC;
732 
733    return eflags;
734 }
735 
736 /* VISIBLE TO LIBVEX CLIENT */
737 void
LibVEX_GuestX86_put_eflags(UInt eflags,VexGuestX86State * vex_state)738 LibVEX_GuestX86_put_eflags ( UInt eflags,
739                              /*MOD*/VexGuestX86State* vex_state )
740 {
741    /* D flag */
742    if (eflags & X86G_CC_MASK_D) {
743       vex_state->guest_DFLAG = 0xFFFFFFFF;
744       eflags &= ~X86G_CC_MASK_D;
745    }
746    else
747       vex_state->guest_DFLAG = 1;
748 
749    /* ID flag */
750    if (eflags & X86G_CC_MASK_ID) {
751       vex_state->guest_IDFLAG = 1;
752       eflags &= ~X86G_CC_MASK_ID;
753    }
754    else
755       vex_state->guest_IDFLAG = 0;
756 
757    /* AC flag */
758    if (eflags & X86G_CC_MASK_AC) {
759       vex_state->guest_ACFLAG = 1;
760       eflags &= ~X86G_CC_MASK_AC;
761    }
762    else
763       vex_state->guest_ACFLAG = 0;
764 
765    UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z |
766                   X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P;
767    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
768    vex_state->guest_CC_DEP1 = eflags & cc_mask;
769    vex_state->guest_CC_DEP2 = 0;
770    vex_state->guest_CC_NDEP = 0;
771 }
772 
773 /* VISIBLE TO LIBVEX CLIENT */
774 void
LibVEX_GuestX86_put_eflag_c(UInt new_carry_flag,VexGuestX86State * vex_state)775 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
776                               /*MOD*/VexGuestX86State* vex_state )
777 {
778    UInt oszacp = x86g_calculate_eflags_all_WRK(
779                     vex_state->guest_CC_OP,
780                     vex_state->guest_CC_DEP1,
781                     vex_state->guest_CC_DEP2,
782                     vex_state->guest_CC_NDEP
783                  );
784    if (new_carry_flag & 1) {
785       oszacp |= X86G_CC_MASK_C;
786    } else {
787       oszacp &= ~X86G_CC_MASK_C;
788    }
789    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
790    vex_state->guest_CC_DEP1 = oszacp;
791    vex_state->guest_CC_DEP2 = 0;
792    vex_state->guest_CC_NDEP = 0;
793 }
794 
795 
796 /*---------------------------------------------------------------*/
797 /*--- %eflags translation-time function specialisers.         ---*/
798 /*--- These help iropt specialise calls the above run-time    ---*/
799 /*--- %eflags functions.                                      ---*/
800 /*---------------------------------------------------------------*/
801 
802 /* Used by the optimiser to try specialisations.  Returns an
803    equivalent expression, or NULL if none. */
804 
isU32(IRExpr * e,UInt n)805 static inline Bool isU32 ( IRExpr* e, UInt n )
806 {
807    return
808       toBool( e->tag == Iex_Const
809               && e->Iex.Const.con->tag == Ico_U32
810               && e->Iex.Const.con->Ico.U32 == n );
811 }
812 
guest_x86_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)813 IRExpr* guest_x86_spechelper ( const HChar* function_name,
814                                IRExpr** args,
815                                IRStmt** precedingStmts,
816                                Int      n_precedingStmts )
817 {
818 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
819 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
820 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
821 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
822 
823    Int i, arity = 0;
824    for (i = 0; args[i]; i++)
825       arity++;
826 #  if 0
827    vex_printf("spec request:\n");
828    vex_printf("   %s  ", function_name);
829    for (i = 0; i < arity; i++) {
830       vex_printf("  ");
831       ppIRExpr(args[i]);
832    }
833    vex_printf("\n");
834 #  endif
835 
836    /* --------- specialising "x86g_calculate_condition" --------- */
837 
838    if (vex_streq(function_name, "x86g_calculate_condition")) {
839       /* specialise calls to above "calculate condition" function */
840       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
841       vassert(arity == 5);
842       cond    = args[0];
843       cc_op   = args[1];
844       cc_dep1 = args[2];
845       cc_dep2 = args[3];
846 
847       /*---------------- ADDL ----------------*/
848 
849       if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
850          /* long add, then Z --> test (dst+src == 0) */
851          return unop(Iop_1Uto32,
852                      binop(Iop_CmpEQ32,
853                            binop(Iop_Add32, cc_dep1, cc_dep2),
854                            mkU32(0)));
855       }
856 
857       /*---------------- SUBL ----------------*/
858 
859       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
860          /* long sub/cmp, then Z --> test dst==src */
861          return unop(Iop_1Uto32,
862                      binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
863       }
864       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
865          /* long sub/cmp, then NZ --> test dst!=src */
866          return unop(Iop_1Uto32,
867                      binop(Iop_CmpNE32, cc_dep1, cc_dep2));
868       }
869 
870       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
871          /* long sub/cmp, then L (signed less than)
872             --> test dst <s src */
873          return unop(Iop_1Uto32,
874                      binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
875       }
876       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
877          /* long sub/cmp, then NL (signed greater than or equal)
878             --> test !(dst <s src) */
879          return binop(Iop_Xor32,
880                       unop(Iop_1Uto32,
881                            binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
882                       mkU32(1));
883       }
884 
885       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
886          /* long sub/cmp, then LE (signed less than or equal)
887             --> test dst <=s src */
888          return unop(Iop_1Uto32,
889                      binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
890       }
891       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
892          /* long sub/cmp, then NLE (signed not less than or equal)
893             --> test dst >s src
894             --> test !(dst <=s src) */
895          return binop(Iop_Xor32,
896                       unop(Iop_1Uto32,
897                            binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
898                       mkU32(1));
899       }
900 
901       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
902          /* long sub/cmp, then BE (unsigned less than or equal)
903             --> test dst <=u src */
904          return unop(Iop_1Uto32,
905                      binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
906       }
907       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
908          /* long sub/cmp, then BE (unsigned greater than)
909             --> test !(dst <=u src) */
910          return binop(Iop_Xor32,
911                       unop(Iop_1Uto32,
912                            binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
913                       mkU32(1));
914       }
915 
916       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
917          /* long sub/cmp, then B (unsigned less than)
918             --> test dst <u src */
919          return unop(Iop_1Uto32,
920                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
921       }
922       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
923          /* long sub/cmp, then NB (unsigned greater than or equal)
924             --> test !(dst <u src) */
925          return binop(Iop_Xor32,
926                       unop(Iop_1Uto32,
927                            binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
928                       mkU32(1));
929       }
930 
931       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
932          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
933          return unop(Iop_1Uto32,
934                      binop(Iop_CmpLT32S,
935                            binop(Iop_Sub32, cc_dep1, cc_dep2),
936                            mkU32(0)));
937       }
938       if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
939          /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
940          return binop(Iop_Xor32,
941                       unop(Iop_1Uto32,
942                            binop(Iop_CmpLT32S,
943                                  binop(Iop_Sub32, cc_dep1, cc_dep2),
944                                  mkU32(0))),
945                       mkU32(1));
946       }
947 
948       /*---------------- SUBW ----------------*/
949 
950       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
951          /* word sub/cmp, then Z --> test dst==src */
952          return unop(Iop_1Uto32,
953                      binop(Iop_CmpEQ16,
954                            unop(Iop_32to16,cc_dep1),
955                            unop(Iop_32to16,cc_dep2)));
956       }
957       if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
958          /* word sub/cmp, then NZ --> test dst!=src */
959          return unop(Iop_1Uto32,
960                      binop(Iop_CmpNE16,
961                            unop(Iop_32to16,cc_dep1),
962                            unop(Iop_32to16,cc_dep2)));
963       }
964 
965       /*---------------- SUBB ----------------*/
966 
967       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
968          /* byte sub/cmp, then Z --> test dst==src */
969          return unop(Iop_1Uto32,
970                      binop(Iop_CmpEQ8,
971                            unop(Iop_32to8,cc_dep1),
972                            unop(Iop_32to8,cc_dep2)));
973       }
974       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
975          /* byte sub/cmp, then NZ --> test dst!=src */
976          return unop(Iop_1Uto32,
977                      binop(Iop_CmpNE8,
978                            unop(Iop_32to8,cc_dep1),
979                            unop(Iop_32to8,cc_dep2)));
980       }
981 
982       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
983          /* byte sub/cmp, then NBE (unsigned greater than)
984             --> test src <u dst */
985          /* Note, args are opposite way round from the usual */
986          return unop(Iop_1Uto32,
987                      binop(Iop_CmpLT32U,
988                            binop(Iop_And32,cc_dep2,mkU32(0xFF)),
989 			   binop(Iop_And32,cc_dep1,mkU32(0xFF))));
990       }
991 
992       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
993                                         && isU32(cc_dep2, 0)) {
994          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
995                                          --> test dst <s 0
996                                          --> (UInt)dst[7]
997             This is yet another scheme by which gcc figures out if the
998             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
999          /* Note: isU32(cc_dep2, 0) is correct, even though this is
1000             for an 8-bit comparison, since the args to the helper
1001             function are always U32s. */
1002          return binop(Iop_And32,
1003                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
1004                       mkU32(1));
1005       }
1006       if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
1007                                         && isU32(cc_dep2, 0)) {
1008          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1009                                           --> test !(dst <s 0)
1010                                           --> (UInt) !dst[7]
1011          */
1012          return binop(Iop_Xor32,
1013                       binop(Iop_And32,
1014                             binop(Iop_Shr32,cc_dep1,mkU8(7)),
1015                             mkU32(1)),
1016                 mkU32(1));
1017       }
1018 
1019       /*---------------- LOGICL ----------------*/
1020 
1021       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
1022          /* long and/or/xor, then Z --> test dst==0 */
1023          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1024       }
1025       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
1026          /* long and/or/xor, then NZ --> test dst!=0 */
1027          return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
1028       }
1029 
1030       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
1031          /* long and/or/xor, then LE
1032             This is pretty subtle.  LOGIC sets SF and ZF according to the
1033             result and makes OF be zero.  LE computes (SZ ^ OF) | ZF, but
1034             OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
1035             the result is <=signed 0.  Hence ...
1036          */
1037          return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1038       }
1039 
1040       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1041          /* long and/or/xor, then BE
1042             LOGIC sets ZF according to the result and makes CF be zero.
1043             BE computes (CF | ZF), but CF is zero, so this reduces ZF
1044             -- which will be 1 iff the result is zero.  Hence ...
1045          */
1046          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1047       }
1048 
1049       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1050          /* see comment below for (LOGICB, CondS) */
1051          /* long and/or/xor, then S --> (UInt)result[31] */
1052          return binop(Iop_And32,
1053                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
1054                       mkU32(1));
1055       }
1056       if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1057          /* see comment below for (LOGICB, CondNS) */
1058          /* long and/or/xor, then S --> (UInt) ~ result[31] */
1059          return binop(Iop_Xor32,
1060                 binop(Iop_And32,
1061                       binop(Iop_Shr32,cc_dep1,mkU8(31)),
1062                       mkU32(1)),
1063                 mkU32(1));
1064       }
1065 
1066       /*---------------- LOGICW ----------------*/
1067 
1068       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1069          /* word and/or/xor, then Z --> test dst==0 */
1070          return unop(Iop_1Uto32,
1071                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1072                                         mkU32(0)));
1073       }
1074 
1075       if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1076          /* see comment below for (LOGICB, CondS) */
1077          /* word and/or/xor, then S --> (UInt)result[15] */
1078          return binop(Iop_And32,
1079                       binop(Iop_Shr32,cc_dep1,mkU8(15)),
1080                       mkU32(1));
1081       }
1082 
1083       /*---------------- LOGICB ----------------*/
1084 
1085       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1086          /* byte and/or/xor, then Z --> test dst==0 */
1087          return unop(Iop_1Uto32,
1088                      binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1089                                         mkU32(0)));
1090       }
1091       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1092          /* byte and/or/xor, then Z --> test dst!=0 */
1093          /* b9ac9:       84 c0                   test   %al,%al
1094             b9acb:       75 0d                   jne    b9ada */
1095          return unop(Iop_1Uto32,
1096                      binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1097                                         mkU32(0)));
1098       }
1099 
1100       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1101          /* this is an idiom gcc sometimes uses to find out if the top
1102             bit of a byte register is set: eg testb %al,%al; js ..
1103             Since it just depends on the top bit of the byte, extract
1104             that bit and explicitly get rid of all the rest.  This
1105             helps memcheck avoid false positives in the case where any
1106             of the other bits in the byte are undefined. */
1107          /* byte and/or/xor, then S --> (UInt)result[7] */
1108          return binop(Iop_And32,
1109                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
1110                       mkU32(1));
1111       }
1112       if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1113          /* ditto, for negation-of-S. */
1114          /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1115          return binop(Iop_Xor32,
1116                 binop(Iop_And32,
1117                       binop(Iop_Shr32,cc_dep1,mkU8(7)),
1118                       mkU32(1)),
1119                 mkU32(1));
1120       }
1121 
1122       /*---------------- DECL ----------------*/
1123 
1124       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1125          /* dec L, then Z --> test dst == 0 */
1126          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1127       }
1128 
1129       if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1130          /* dec L, then S --> compare DST <s 0 */
1131          return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1132       }
1133 
1134       /*---------------- DECW ----------------*/
1135 
1136       if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1137          /* dec W, then Z --> test dst == 0 */
1138          return unop(Iop_1Uto32,
1139                      binop(Iop_CmpEQ32,
1140                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
1141                            mkU32(0)));
1142       }
1143 
1144       /*---------------- INCW ----------------*/
1145 
1146       if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1147          /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1148          /* inc W, then Z --> test dst == 0 */
1149          return unop(Iop_1Uto32,
1150                      binop(Iop_CmpEQ32,
1151                            binop(Iop_Shl32,cc_dep1,mkU8(16)),
1152                            mkU32(0)));
1153       }
1154 
1155       /*---------------- SHRL ----------------*/
1156 
1157       if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1158          /* SHRL, then Z --> test dep1 == 0 */
1159          return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1160       }
1161 
1162       /*---------------- COPY ----------------*/
1163       /* This can happen, as a result of x87 FP compares: "fcom ... ;
1164          fnstsw %ax ; sahf ; jbe" for example. */
1165 
1166       if (isU32(cc_op, X86G_CC_OP_COPY) &&
1167           (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1168          /* COPY, then BE --> extract C and Z from dep1, and test
1169             (C or Z) == 1. */
1170          /* COPY, then NBE --> extract C and Z from dep1, and test
1171             (C or Z) == 0. */
1172          UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1173          return
1174             unop(
1175                Iop_1Uto32,
1176                binop(
1177                   Iop_CmpEQ32,
1178                   binop(
1179                      Iop_And32,
1180                      binop(
1181                         Iop_Or32,
1182                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1183                         binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1184                      ),
1185                      mkU32(1)
1186                   ),
1187                   mkU32(nnn)
1188                )
1189             );
1190       }
1191 
1192       if (isU32(cc_op, X86G_CC_OP_COPY)
1193           && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1194          /* COPY, then B --> extract C from dep1, and test (C == 1). */
1195          /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1196          UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1197          return
1198             unop(
1199                Iop_1Uto32,
1200                binop(
1201                   Iop_CmpEQ32,
1202                   binop(
1203                      Iop_And32,
1204                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1205                      mkU32(1)
1206                   ),
1207                   mkU32(nnn)
1208                )
1209             );
1210       }
1211 
1212       if (isU32(cc_op, X86G_CC_OP_COPY)
1213           && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1214          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1215          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1216          UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1217          return
1218             unop(
1219                Iop_1Uto32,
1220                binop(
1221                   Iop_CmpEQ32,
1222                   binop(
1223                      Iop_And32,
1224                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1225                      mkU32(1)
1226                   ),
1227                   mkU32(nnn)
1228                )
1229             );
1230       }
1231 
1232       if (isU32(cc_op, X86G_CC_OP_COPY)
1233           && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1234          /* COPY, then P --> extract P from dep1, and test (P == 1). */
1235          /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1236          UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1237          return
1238             unop(
1239                Iop_1Uto32,
1240                binop(
1241                   Iop_CmpEQ32,
1242                   binop(
1243                      Iop_And32,
1244                      binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1245                      mkU32(1)
1246                   ),
1247                   mkU32(nnn)
1248                )
1249             );
1250       }
1251 
1252       return NULL;
1253    }
1254 
1255    /* --------- specialising "x86g_calculate_eflags_c" --------- */
1256 
1257    if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1258       /* specialise calls to above "calculate_eflags_c" function */
1259       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1260       vassert(arity == 4);
1261       cc_op   = args[0];
1262       cc_dep1 = args[1];
1263       cc_dep2 = args[2];
1264       cc_ndep = args[3];
1265 
1266       if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1267          /* C after sub denotes unsigned less than */
1268          return unop(Iop_1Uto32,
1269                      binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1270       }
1271       if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1272          /* C after sub denotes unsigned less than */
1273          return unop(Iop_1Uto32,
1274                      binop(Iop_CmpLT32U,
1275                            binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1276                            binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1277       }
1278       if (isU32(cc_op, X86G_CC_OP_LOGICL)
1279           || isU32(cc_op, X86G_CC_OP_LOGICW)
1280           || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1281          /* cflag after logic is zero */
1282          return mkU32(0);
1283       }
1284       if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1285          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1286          return cc_ndep;
1287       }
1288       if (isU32(cc_op, X86G_CC_OP_COPY)) {
1289          /* cflag after COPY is stored in DEP1. */
1290          return
1291             binop(
1292                Iop_And32,
1293                binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1294                mkU32(1)
1295             );
1296       }
1297       if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1298          /* C after add denotes sum <u either arg */
1299          return unop(Iop_1Uto32,
1300                      binop(Iop_CmpLT32U,
1301                            binop(Iop_Add32, cc_dep1, cc_dep2),
1302                            cc_dep1));
1303       }
1304       // ATC, requires verification, no test case known
1305       //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1306       //   /* C after signed widening multiply denotes the case where
1307       //      the top half of the result isn't simply the sign extension
1308       //      of the bottom half (iow the result doesn't fit completely
1309       //      in the bottom half).  Hence:
1310       //        C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1311       //      where 'x' denotes signed widening multiply.*/
1312       //   return
1313       //      unop(Iop_1Uto32,
1314       //           binop(Iop_CmpNE32,
1315       //                 unop(Iop_64HIto32,
1316       //                      binop(Iop_MullS32, cc_dep1, cc_dep2)),
1317       //                 binop(Iop_Sar32,
1318       //                       binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1319       //}
1320 #     if 0
1321       if (cc_op->tag == Iex_Const) {
1322          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1323       }
1324 #     endif
1325 
1326       return NULL;
1327    }
1328 
1329    /* --------- specialising "x86g_calculate_eflags_all" --------- */
1330 
1331    if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1332       /* specialise calls to above "calculate_eflags_all" function */
1333       IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1334       vassert(arity == 4);
1335       cc_op   = args[0];
1336       cc_dep1 = args[1];
1337       /* cc_dep2 = args[2]; */
1338       /* cc_ndep = args[3]; */
1339 
1340       if (isU32(cc_op, X86G_CC_OP_COPY)) {
1341          /* eflags after COPY are stored in DEP1. */
1342          return
1343             binop(
1344                Iop_And32,
1345                cc_dep1,
1346                mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1347                      | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1348             );
1349       }
1350       return NULL;
1351    }
1352 
1353 #  undef unop
1354 #  undef binop
1355 #  undef mkU32
1356 #  undef mkU8
1357 
1358    return NULL;
1359 }
1360 
1361 
1362 /*---------------------------------------------------------------*/
1363 /*--- Supporting functions for x87 FPU activities.            ---*/
1364 /*---------------------------------------------------------------*/
1365 
host_is_little_endian(void)1366 static inline Bool host_is_little_endian ( void )
1367 {
1368    UInt x = 0x76543210;
1369    UChar* p = (UChar*)(&x);
1370    return toBool(*p == 0x10);
1371 }
1372 
1373 /* 80 and 64-bit floating point formats:
1374 
1375    80-bit:
1376 
1377     S  0       0-------0      zero
1378     S  0       0X------X      denormals
1379     S  1-7FFE  1X------X      normals (all normals have leading 1)
1380     S  7FFF    10------0      infinity
1381     S  7FFF    10X-----X      snan
1382     S  7FFF    11X-----X      qnan
1383 
1384    S is the sign bit.  For runs X----X, at least one of the Xs must be
1385    nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
1386    there is an explicitly represented leading 1, and a sign bit,
1387    giving 80 in total.
1388 
1389    64-bit avoids the confusion of an explicitly represented leading 1
1390    and so is simpler:
1391 
1392     S  0      0------0   zero
1393     S  0      X------X   denormals
1394     S  1-7FE  any        normals
1395     S  7FF    0------0   infinity
1396     S  7FF    0X-----X   snan
1397     S  7FF    1X-----X   qnan
1398 
1399    Exponent is 11 bits, fractional part is 52 bits, and there is a
1400    sign bit, giving 64 in total.
1401 */
1402 
1403 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1404 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_FXAM(UInt tag,ULong dbl)1405 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1406 {
1407    Bool   mantissaIsZero;
1408    Int    bexp;
1409    UChar  sign;
1410    UChar* f64;
1411 
1412    vassert(host_is_little_endian());
1413 
1414    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1415 
1416    f64  = (UChar*)(&dbl);
1417    sign = toUChar( (f64[7] >> 7) & 1 );
1418 
1419    /* First off, if the tag indicates the register was empty,
1420       return 1,0,sign,1 */
1421    if (tag == 0) {
1422       /* vex_printf("Empty\n"); */
1423       return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1424                                  | X86G_FC_MASK_C0;
1425    }
1426 
1427    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1428    bexp &= 0x7FF;
1429 
1430    mantissaIsZero
1431       = toBool(
1432            (f64[6] & 0x0F) == 0
1433            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1434         );
1435 
1436    /* If both exponent and mantissa are zero, the value is zero.
1437       Return 1,0,sign,0. */
1438    if (bexp == 0 && mantissaIsZero) {
1439       /* vex_printf("Zero\n"); */
1440       return X86G_FC_MASK_C3 | 0
1441                              | (sign << X86G_FC_SHIFT_C1) | 0;
1442    }
1443 
1444    /* If exponent is zero but mantissa isn't, it's a denormal.
1445       Return 1,1,sign,0. */
1446    if (bexp == 0 && !mantissaIsZero) {
1447       /* vex_printf("Denormal\n"); */
1448       return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1449                              | (sign << X86G_FC_SHIFT_C1) | 0;
1450    }
1451 
1452    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1453       Return 0,1,sign,1. */
1454    if (bexp == 0x7FF && mantissaIsZero) {
1455       /* vex_printf("Inf\n"); */
1456       return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1457                                  | X86G_FC_MASK_C0;
1458    }
1459 
1460    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1461       Return 0,0,sign,1. */
1462    if (bexp == 0x7FF && !mantissaIsZero) {
1463       /* vex_printf("NaN\n"); */
1464       return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1465    }
1466 
1467    /* Uh, ok, we give up.  It must be a normal finite number.
1468       Return 0,1,sign,0.
1469    */
1470    /* vex_printf("normal\n"); */
1471    return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1472 }
1473 
1474 
1475 /* CALLED FROM GENERATED CODE */
1476 /* DIRTY HELPER (reads guest memory) */
x86g_dirtyhelper_loadF80le(Addr addrU)1477 ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
1478 {
1479    ULong f64;
1480    convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
1481    return f64;
1482 }
1483 
1484 /* CALLED FROM GENERATED CODE */
1485 /* DIRTY HELPER (writes guest memory) */
x86g_dirtyhelper_storeF80le(Addr addrU,ULong f64)1486 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
1487 {
1488    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
1489 }
1490 
1491 
1492 /*----------------------------------------------*/
1493 /*--- The exported fns ..                    ---*/
1494 /*----------------------------------------------*/
1495 
1496 /* Layout of the real x87 state. */
1497 /* 13 June 05: Fpu_State and auxiliary constants was moved to
1498    g_generic_x87.h */
1499 
1500 
1501 /* CLEAN HELPER */
1502 /* fpucw[15:0] contains a x87 native format FPU control word.
1503    Extract from it the required FPROUND value and any resulting
1504    emulation warning, and return (warn << 32) | fpround value.
1505 */
x86g_check_fldcw(UInt fpucw)1506 ULong x86g_check_fldcw ( UInt fpucw )
1507 {
1508    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
1509    /* NOTE, encoded exactly as per enum IRRoundingMode. */
1510    UInt rmode = (fpucw >> 10) & 3;
1511 
1512    /* Detect any required emulation warnings. */
1513    VexEmNote ew = EmNote_NONE;
1514 
1515    if ((fpucw & 0x3F) != 0x3F) {
1516       /* unmasked exceptions! */
1517       ew = EmWarn_X86_x87exns;
1518    }
1519    else
1520    if (((fpucw >> 8) & 3) != 3) {
1521       /* unsupported precision */
1522       ew = EmWarn_X86_x87precision;
1523    }
1524 
1525    return (((ULong)ew) << 32) | ((ULong)rmode);
1526 }
1527 
1528 /* CLEAN HELPER */
1529 /* Given fpround as an IRRoundingMode value, create a suitable x87
1530    native format FPU control word. */
x86g_create_fpucw(UInt fpround)1531 UInt x86g_create_fpucw ( UInt fpround )
1532 {
1533    fpround &= 3;
1534    return 0x037F | (fpround << 10);
1535 }
1536 
1537 
1538 /* CLEAN HELPER */
1539 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1540    Extract from it the required SSEROUND value and any resulting
1541    emulation warning, and return (warn << 32) | sseround value.
1542 */
x86g_check_ldmxcsr(UInt mxcsr)1543 ULong x86g_check_ldmxcsr ( UInt mxcsr )
1544 {
1545    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
1546    /* NOTE, encoded exactly as per enum IRRoundingMode. */
1547    UInt rmode = (mxcsr >> 13) & 3;
1548 
1549    /* Detect any required emulation warnings. */
1550    VexEmNote ew = EmNote_NONE;
1551 
1552    if ((mxcsr & 0x1F80) != 0x1F80) {
1553       /* unmasked exceptions! */
1554       ew = EmWarn_X86_sseExns;
1555    }
1556    else
1557    if (mxcsr & (1<<15)) {
1558       /* FZ is set */
1559       ew = EmWarn_X86_fz;
1560    }
1561    else
1562    if (mxcsr & (1<<6)) {
1563       /* DAZ is set */
1564       ew = EmWarn_X86_daz;
1565    }
1566 
1567    return (((ULong)ew) << 32) | ((ULong)rmode);
1568 }
1569 
1570 
1571 /* CLEAN HELPER */
1572 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1573    native format MXCSR value. */
x86g_create_mxcsr(UInt sseround)1574 UInt x86g_create_mxcsr ( UInt sseround )
1575 {
1576    sseround &= 3;
1577    return 0x1F80 | (sseround << 13);
1578 }
1579 
1580 
1581 /* CALLED FROM GENERATED CODE */
1582 /* DIRTY HELPER (writes guest state) */
1583 /* Initialise the x87 FPU state as per 'finit'. */
x86g_dirtyhelper_FINIT(VexGuestX86State * gst)1584 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1585 {
1586    Int i;
1587    gst->guest_FTOP = 0;
1588    for (i = 0; i < 8; i++) {
1589       gst->guest_FPTAG[i] = 0; /* empty */
1590       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1591    }
1592    gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1593    gst->guest_FC3210  = 0;
1594 }
1595 
1596 
1597 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
1598    appears to differ from the former only in that the 8 FP registers
1599    themselves are not transferred into the guest state. */
1600 static
do_put_x87(Bool moveRegs,UChar * x87_state,VexGuestX86State * vex_state)1601 VexEmNote do_put_x87 ( Bool moveRegs,
1602                        /*IN*/UChar* x87_state,
1603                        /*OUT*/VexGuestX86State* vex_state )
1604 {
1605    Int        stno, preg;
1606    UInt       tag;
1607    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1608    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1609    Fpu_State* x87     = (Fpu_State*)x87_state;
1610    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
1611    UInt       tagw    = x87->env[FP_ENV_TAG];
1612    UInt       fpucw   = x87->env[FP_ENV_CTRL];
1613    UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
1614    VexEmNote  ew;
1615    UInt       fpround;
1616    ULong      pair;
1617 
1618    /* Copy registers and tags */
1619    for (stno = 0; stno < 8; stno++) {
1620       preg = (stno + ftop) & 7;
1621       tag = (tagw >> (2*preg)) & 3;
1622       if (tag == 3) {
1623          /* register is empty */
1624          /* hmm, if it's empty, does it still get written?  Probably
1625             safer to say it does.  If we don't, memcheck could get out
1626             of sync, in that it thinks all FP registers are defined by
1627             this helper, but in reality some have not been updated. */
1628          if (moveRegs)
1629             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1630          vexTags[preg] = 0;
1631       } else {
1632          /* register is non-empty */
1633          if (moveRegs)
1634             convert_f80le_to_f64le( &x87->reg[10*stno],
1635                                     (UChar*)&vexRegs[preg] );
1636          vexTags[preg] = 1;
1637       }
1638    }
1639 
1640    /* stack pointer */
1641    vex_state->guest_FTOP = ftop;
1642 
1643    /* status word */
1644    vex_state->guest_FC3210 = c3210;
1645 
1646    /* handle the control word, setting FPROUND and detecting any
1647       emulation warnings. */
1648    pair    = x86g_check_fldcw ( (UInt)fpucw );
1649    fpround = (UInt)pair;
1650    ew      = (VexEmNote)(pair >> 32);
1651 
1652    vex_state->guest_FPROUND = fpround & 3;
1653 
1654    /* emulation warnings --> caller */
1655    return ew;
1656 }
1657 
1658 
1659 /* Create an x87 FPU state from the guest state, as close as
1660    we can approximate it. */
1661 static
do_get_x87(VexGuestX86State * vex_state,UChar * x87_state)1662 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1663                   /*OUT*/UChar* x87_state )
1664 {
1665    Int        i, stno, preg;
1666    UInt       tagw;
1667    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1668    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1669    Fpu_State* x87     = (Fpu_State*)x87_state;
1670    UInt       ftop    = vex_state->guest_FTOP;
1671    UInt       c3210   = vex_state->guest_FC3210;
1672 
1673    for (i = 0; i < 14; i++)
1674       x87->env[i] = 0;
1675 
1676    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1677    x87->env[FP_ENV_STAT]
1678       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1679    x87->env[FP_ENV_CTRL]
1680       = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1681 
1682    /* Dump the register stack in ST order. */
1683    tagw = 0;
1684    for (stno = 0; stno < 8; stno++) {
1685       preg = (stno + ftop) & 7;
1686       if (vexTags[preg] == 0) {
1687          /* register is empty */
1688          tagw |= (3 << (2*preg));
1689          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1690                                  &x87->reg[10*stno] );
1691       } else {
1692          /* register is full. */
1693          tagw |= (0 << (2*preg));
1694          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1695                                  &x87->reg[10*stno] );
1696       }
1697    }
1698    x87->env[FP_ENV_TAG] = toUShort(tagw);
1699 }
1700 
1701 
1702 /* CALLED FROM GENERATED CODE */
1703 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FXSAVE(VexGuestX86State * gst,HWord addr)1704 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1705 {
1706    /* Somewhat roundabout, but at least it's simple. */
1707    Fpu_State tmp;
1708    UShort*   addrS = (UShort*)addr;
1709    UChar*    addrC = (UChar*)addr;
1710    U128*     xmm   = (U128*)(addr + 160);
1711    UInt      mxcsr;
1712    UShort    fp_tags;
1713    UInt      summary_tags;
1714    Int       r, stno;
1715    UShort    *srcS, *dstS;
1716 
1717    do_get_x87( gst, (UChar*)&tmp );
1718    mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1719 
1720    /* Now build the proper fxsave image from the x87 image we just
1721       made. */
1722 
1723    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1724    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1725 
1726    /* set addrS[2] in an endian-independent way */
1727    summary_tags = 0;
1728    fp_tags = tmp.env[FP_ENV_TAG];
1729    for (r = 0; r < 8; r++) {
1730       if ( ((fp_tags >> (2*r)) & 3) != 3 )
1731          summary_tags |= (1 << r);
1732    }
1733    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
1734    addrC[5]  = 0; /* pad */
1735 
1736    addrS[3]  = 0; /* FOP: fpu opcode (bogus) */
1737    addrS[4]  = 0;
1738    addrS[5]  = 0; /* FPU IP (bogus) */
1739    addrS[6]  = 0; /* FPU IP's segment selector (bogus) (although we
1740                      could conceivably dump %CS here) */
1741 
1742    addrS[7]  = 0; /* Intel reserved */
1743 
1744    addrS[8]  = 0; /* FPU DP (operand pointer) (bogus) */
1745    addrS[9]  = 0; /* FPU DP (operand pointer) (bogus) */
1746    addrS[10] = 0; /* segment selector for above operand pointer; %DS
1747                      perhaps? */
1748    addrS[11] = 0; /* Intel reserved */
1749 
1750    addrS[12] = toUShort(mxcsr);  /* MXCSR */
1751    addrS[13] = toUShort(mxcsr >> 16);
1752 
1753    addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1754    addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1755 
1756    /* Copy in the FP registers, in ST order. */
1757    for (stno = 0; stno < 8; stno++) {
1758       srcS = (UShort*)(&tmp.reg[10*stno]);
1759       dstS = (UShort*)(&addrS[16 + 8*stno]);
1760       dstS[0] = srcS[0];
1761       dstS[1] = srcS[1];
1762       dstS[2] = srcS[2];
1763       dstS[3] = srcS[3];
1764       dstS[4] = srcS[4];
1765       dstS[5] = 0;
1766       dstS[6] = 0;
1767       dstS[7] = 0;
1768    }
1769 
1770    /* That's the first 160 bytes of the image done.  Now only %xmm0
1771       .. %xmm7 remain to be copied.  If the host is big-endian, these
1772       need to be byte-swapped. */
1773    vassert(host_is_little_endian());
1774 
1775 #  define COPY_U128(_dst,_src)                       \
1776       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1777            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1778       while (0)
1779 
1780    COPY_U128( xmm[0], gst->guest_XMM0 );
1781    COPY_U128( xmm[1], gst->guest_XMM1 );
1782    COPY_U128( xmm[2], gst->guest_XMM2 );
1783    COPY_U128( xmm[3], gst->guest_XMM3 );
1784    COPY_U128( xmm[4], gst->guest_XMM4 );
1785    COPY_U128( xmm[5], gst->guest_XMM5 );
1786    COPY_U128( xmm[6], gst->guest_XMM6 );
1787    COPY_U128( xmm[7], gst->guest_XMM7 );
1788 
1789 #  undef COPY_U128
1790 }
1791 
1792 
1793 /* CALLED FROM GENERATED CODE */
1794 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FXRSTOR(VexGuestX86State * gst,HWord addr)1795 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1796 {
1797    Fpu_State tmp;
1798    VexEmNote warnX87 = EmNote_NONE;
1799    VexEmNote warnXMM = EmNote_NONE;
1800    UShort*   addrS   = (UShort*)addr;
1801    UChar*    addrC   = (UChar*)addr;
1802    U128*     xmm     = (U128*)(addr + 160);
1803    UShort    fp_tags;
1804    Int       r, stno, i;
1805 
1806    /* Restore %xmm0 .. %xmm7.  If the host is big-endian, these need
1807       to be byte-swapped. */
1808    vassert(host_is_little_endian());
1809 
1810 #  define COPY_U128(_dst,_src)                       \
1811       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1812            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1813       while (0)
1814 
1815    COPY_U128( gst->guest_XMM0, xmm[0] );
1816    COPY_U128( gst->guest_XMM1, xmm[1] );
1817    COPY_U128( gst->guest_XMM2, xmm[2] );
1818    COPY_U128( gst->guest_XMM3, xmm[3] );
1819    COPY_U128( gst->guest_XMM4, xmm[4] );
1820    COPY_U128( gst->guest_XMM5, xmm[5] );
1821    COPY_U128( gst->guest_XMM6, xmm[6] );
1822    COPY_U128( gst->guest_XMM7, xmm[7] );
1823 
1824 #  undef COPY_U128
1825 
1826    /* Copy the x87 registers out of the image, into a temporary
1827       Fpu_State struct. */
1828 
1829    /* LLVM on Darwin turns the following loop into a movaps plus a
1830       handful of scalar stores.  This would work fine except for the
1831       fact that VEX doesn't keep the stack correctly (16-) aligned for
1832       the call, so it segfaults.  Hence, split the loop into two
1833       pieces (and pray LLVM doesn't merely glue them back together) so
1834       it's composed only of scalar stores and so is alignment
1835       insensitive.  Of course this is a kludge of the lamest kind --
1836       VEX should be fixed properly. */
1837    /* Code that seems to trigger the problem:
1838       for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1839    for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1840    __asm__ __volatile__("" ::: "memory");
1841    for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1842 
1843    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1844    /* fill in tmp.reg[0..7] */
1845    for (stno = 0; stno < 8; stno++) {
1846       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1847       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1848       dstS[0] = srcS[0];
1849       dstS[1] = srcS[1];
1850       dstS[2] = srcS[2];
1851       dstS[3] = srcS[3];
1852       dstS[4] = srcS[4];
1853    }
1854    /* fill in tmp.env[0..13] */
1855    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1856    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1857 
1858    fp_tags = 0;
1859    for (r = 0; r < 8; r++) {
1860       if (addrC[4] & (1<<r))
1861          fp_tags |= (0 << (2*r)); /* EMPTY */
1862       else
1863          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1864    }
1865    tmp.env[FP_ENV_TAG] = fp_tags;
1866 
1867    /* Now write 'tmp' into the guest state. */
1868    warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
1869 
1870    { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1871                 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1872      ULong w64 = x86g_check_ldmxcsr( w32 );
1873 
1874      warnXMM = (VexEmNote)(w64 >> 32);
1875 
1876      gst->guest_SSEROUND = w64 & 0xFFFFFFFF;
1877    }
1878 
1879    /* Prefer an X87 emwarn over an XMM one, if both exist. */
1880    if (warnX87 != EmNote_NONE)
1881       return warnX87;
1882    else
1883       return warnXMM;
1884 }
1885 
1886 
1887 /* CALLED FROM GENERATED CODE */
1888 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSAVE(VexGuestX86State * gst,HWord addr)1889 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1890 {
1891    do_get_x87( gst, (UChar*)addr );
1892 }
1893 
1894 /* CALLED FROM GENERATED CODE */
1895 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FRSTOR(VexGuestX86State * gst,HWord addr)1896 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1897 {
1898    return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
1899 }
1900 
1901 /* CALLED FROM GENERATED CODE */
1902 /* DIRTY HELPER (reads guest state, writes guest mem) */
x86g_dirtyhelper_FSTENV(VexGuestX86State * gst,HWord addr)1903 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1904 {
1905    /* Somewhat roundabout, but at least it's simple. */
1906    Int       i;
1907    UShort*   addrP = (UShort*)addr;
1908    Fpu_State tmp;
1909    do_get_x87( gst, (UChar*)&tmp );
1910    for (i = 0; i < 14; i++)
1911       addrP[i] = tmp.env[i];
1912 }
1913 
1914 /* CALLED FROM GENERATED CODE */
1915 /* DIRTY HELPER (writes guest state, reads guest mem) */
x86g_dirtyhelper_FLDENV(VexGuestX86State * gst,HWord addr)1916 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1917 {
1918    return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
1919 }
1920 
1921 /* VISIBLE TO LIBVEX CLIENT */
1922 /* Do x87 save from the supplied VexGuestX86State structure and store the
1923    result at the given address which represents a buffer of at least 108
1924    bytes. */
LibVEX_GuestX86_get_x87(VexGuestX86State * vex_state,UChar * x87_state)1925 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1926                                /*OUT*/UChar* x87_state )
1927 {
1928    do_get_x87 ( vex_state, x87_state );
1929 }
1930 
1931 /* VISIBLE TO LIBVEX CLIENT */
1932 /* Do x87 restore from the supplied address and store read values to the given
1933    VexGuestX86State structure. */
LibVEX_GuestX86_put_x87(UChar * x87_state,VexGuestX86State * vex_state)1934 VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
1935                                     /*MOD*/VexGuestX86State* vex_state )
1936 {
1937    return do_put_x87 ( True/*moveRegs*/, x87_state, vex_state );
1938 }
1939 
1940 /* VISIBLE TO LIBVEX CLIENT */
1941 /* Return mxcsr from the supplied VexGuestX86State structure. */
LibVEX_GuestX86_get_mxcsr(VexGuestX86State * vex_state)1942 UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state )
1943 {
1944    return x86g_create_mxcsr ( vex_state->guest_SSEROUND );
1945 }
1946 
1947 /* VISIBLE TO LIBVEX CLIENT */
1948 /* Modify the given VexGuestX86State structure according to the passed mxcsr
1949    value. */
LibVEX_GuestX86_put_mxcsr(UInt mxcsr,VexGuestX86State * vex_state)1950 VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr,
1951                                       /*MOD*/VexGuestX86State* vex_state)
1952 {
1953    ULong w64 = x86g_check_ldmxcsr( mxcsr );
1954    vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF;
1955    return (VexEmNote)(w64 >> 32);
1956 }
1957 
1958 /*---------------------------------------------------------------*/
1959 /*--- Misc integer helpers, including rotates and CPUID.      ---*/
1960 /*---------------------------------------------------------------*/
1961 
1962 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1963 /* Calculate both flags and value result for rotate right
1964    through the carry bit.  Result in low 32 bits,
1965    new flags (OSZACP) in high 32 bits.
1966 */
x86g_calculate_RCR(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)1967 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
1968 {
1969    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
1970 
1971    switch (sz) {
1972       case 4:
1973          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1974          of        = ((arg >> 31) ^ cf) & 1;
1975          while (tempCOUNT > 0) {
1976             tempcf = arg & 1;
1977             arg    = (arg >> 1) | (cf << 31);
1978             cf     = tempcf;
1979             tempCOUNT--;
1980          }
1981          break;
1982       case 2:
1983          while (tempCOUNT >= 17) tempCOUNT -= 17;
1984          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1985          of        = ((arg >> 15) ^ cf) & 1;
1986          while (tempCOUNT > 0) {
1987             tempcf = arg & 1;
1988             arg    = ((arg >> 1) & 0x7FFF) | (cf << 15);
1989             cf     = tempcf;
1990             tempCOUNT--;
1991          }
1992          break;
1993       case 1:
1994          while (tempCOUNT >= 9) tempCOUNT -= 9;
1995          cf        = (eflags_in >> X86G_CC_SHIFT_C) & 1;
1996          of        = ((arg >> 7) ^ cf) & 1;
1997          while (tempCOUNT > 0) {
1998             tempcf = arg & 1;
1999             arg    = ((arg >> 1) & 0x7F) | (cf << 7);
2000             cf     = tempcf;
2001             tempCOUNT--;
2002          }
2003          break;
2004       default:
2005          vpanic("calculate_RCR: invalid size");
2006    }
2007 
2008    cf &= 1;
2009    of &= 1;
2010    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2011    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2012 
2013    return (((ULong)eflags_in) << 32) | ((ULong)arg);
2014 }
2015 
2016 
2017 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2018 /* Calculate both flags and value result for rotate left
2019    through the carry bit.  Result in low 32 bits,
2020    new flags (OSZACP) in high 32 bits.
2021 */
x86g_calculate_RCL(UInt arg,UInt rot_amt,UInt eflags_in,UInt sz)2022 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
2023 {
2024    UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
2025 
2026    switch (sz) {
2027       case 4:
2028          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2029          while (tempCOUNT > 0) {
2030             tempcf = (arg >> 31) & 1;
2031             arg    = (arg << 1) | (cf & 1);
2032             cf     = tempcf;
2033             tempCOUNT--;
2034          }
2035          of = ((arg >> 31) ^ cf) & 1;
2036          break;
2037       case 2:
2038          while (tempCOUNT >= 17) tempCOUNT -= 17;
2039          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2040          while (tempCOUNT > 0) {
2041             tempcf = (arg >> 15) & 1;
2042             arg    = 0xFFFF & ((arg << 1) | (cf & 1));
2043             cf     = tempcf;
2044             tempCOUNT--;
2045          }
2046          of = ((arg >> 15) ^ cf) & 1;
2047          break;
2048       case 1:
2049          while (tempCOUNT >= 9) tempCOUNT -= 9;
2050          cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2051          while (tempCOUNT > 0) {
2052             tempcf = (arg >> 7) & 1;
2053             arg    = 0xFF & ((arg << 1) | (cf & 1));
2054             cf     = tempcf;
2055             tempCOUNT--;
2056          }
2057          of = ((arg >> 7) ^ cf) & 1;
2058          break;
2059       default:
2060          vpanic("calculate_RCL: invalid size");
2061    }
2062 
2063    cf &= 1;
2064    of &= 1;
2065    eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2066    eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2067 
2068    return (((ULong)eflags_in) << 32) | ((ULong)arg);
2069 }
2070 
2071 
2072 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2073 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2074    AX value in low half of arg, OSZACP in upper half.
2075    See guest-x86/toIR.c usage point for details.
2076 */
calc_parity_8bit(UInt w32)2077 static UInt calc_parity_8bit ( UInt w32 ) {
2078    UInt i;
2079    UInt p = 1;
2080    for (i = 0; i < 8; i++)
2081       p ^= (1 & (w32 >> i));
2082    return p;
2083 }
x86g_calculate_daa_das_aaa_aas(UInt flags_and_AX,UInt opcode)2084 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2085 {
2086    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2087    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2088    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2089    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2090    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2091    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2092    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2093    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2094    UInt result = 0;
2095 
2096    switch (opcode) {
2097       case 0x27: { /* DAA */
2098          UInt old_AL = r_AL;
2099          UInt old_C  = r_C;
2100          r_C = 0;
2101          if ((r_AL & 0xF) > 9 || r_A == 1) {
2102             r_AL = r_AL + 6;
2103             r_C  = old_C;
2104             if (r_AL >= 0x100) r_C = 1;
2105             r_A = 1;
2106          } else {
2107             r_A = 0;
2108          }
2109          if (old_AL > 0x99 || old_C == 1) {
2110             r_AL = r_AL + 0x60;
2111             r_C  = 1;
2112          } else {
2113             r_C = 0;
2114          }
2115          /* O is undefined.  S Z and P are set according to the
2116 	    result. */
2117          r_AL &= 0xFF;
2118          r_O = 0; /* let's say */
2119          r_S = (r_AL & 0x80) ? 1 : 0;
2120          r_Z = (r_AL == 0) ? 1 : 0;
2121          r_P = calc_parity_8bit( r_AL );
2122          break;
2123       }
2124       case 0x2F: { /* DAS */
2125          UInt old_AL = r_AL;
2126          UInt old_C  = r_C;
2127          r_C = 0;
2128          if ((r_AL & 0xF) > 9 || r_A == 1) {
2129             Bool borrow = r_AL < 6;
2130             r_AL = r_AL - 6;
2131             r_C  = old_C;
2132             if (borrow) r_C = 1;
2133             r_A = 1;
2134          } else {
2135             r_A = 0;
2136          }
2137          if (old_AL > 0x99 || old_C == 1) {
2138             r_AL = r_AL - 0x60;
2139             r_C  = 1;
2140          } else {
2141             /* Intel docs are wrong: r_C = 0; */
2142          }
2143          /* O is undefined.  S Z and P are set according to the
2144 	    result. */
2145          r_AL &= 0xFF;
2146          r_O = 0; /* let's say */
2147          r_S = (r_AL & 0x80) ? 1 : 0;
2148          r_Z = (r_AL == 0) ? 1 : 0;
2149          r_P = calc_parity_8bit( r_AL );
2150          break;
2151       }
2152       case 0x37: { /* AAA */
2153          Bool nudge = r_AL > 0xF9;
2154          if ((r_AL & 0xF) > 9 || r_A == 1) {
2155             r_AL = r_AL + 6;
2156             r_AH = r_AH + 1 + (nudge ? 1 : 0);
2157             r_A  = 1;
2158             r_C  = 1;
2159             r_AL = r_AL & 0xF;
2160          } else {
2161             r_A  = 0;
2162             r_C  = 0;
2163             r_AL = r_AL & 0xF;
2164          }
2165          /* O S Z and P are undefined. */
2166          r_O = r_S = r_Z = r_P = 0; /* let's say */
2167          break;
2168       }
2169       case 0x3F: { /* AAS */
2170          Bool nudge = r_AL < 0x06;
2171          if ((r_AL & 0xF) > 9 || r_A == 1) {
2172             r_AL = r_AL - 6;
2173             r_AH = r_AH - 1 - (nudge ? 1 : 0);
2174             r_A  = 1;
2175             r_C  = 1;
2176             r_AL = r_AL & 0xF;
2177          } else {
2178             r_A  = 0;
2179             r_C  = 0;
2180             r_AL = r_AL & 0xF;
2181          }
2182          /* O S Z and P are undefined. */
2183          r_O = r_S = r_Z = r_P = 0; /* let's say */
2184          break;
2185       }
2186       default:
2187          vassert(0);
2188    }
2189    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2190             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2191             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2192             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2193             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2194             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2195             | ( (r_AH & 0xFF) << 8 )
2196             | ( (r_AL & 0xFF) << 0 );
2197    return result;
2198 }
2199 
x86g_calculate_aad_aam(UInt flags_and_AX,UInt opcode)2200 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2201 {
2202    UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2203    UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2204    UInt r_O  = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2205    UInt r_S  = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2206    UInt r_Z  = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2207    UInt r_A  = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2208    UInt r_C  = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2209    UInt r_P  = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2210    UInt result = 0;
2211 
2212    switch (opcode) {
2213       case 0xD4: { /* AAM */
2214          r_AH = r_AL / 10;
2215          r_AL = r_AL % 10;
2216          break;
2217       }
2218       case 0xD5: { /* AAD */
2219          r_AL = ((r_AH * 10) + r_AL) & 0xff;
2220          r_AH = 0;
2221          break;
2222       }
2223       default:
2224          vassert(0);
2225    }
2226 
2227    r_O = 0; /* let's say (undefined) */
2228    r_C = 0; /* let's say (undefined) */
2229    r_A = 0; /* let's say (undefined) */
2230    r_S = (r_AL & 0x80) ? 1 : 0;
2231    r_Z = (r_AL == 0) ? 1 : 0;
2232    r_P = calc_parity_8bit( r_AL );
2233 
2234    result =   ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2235             | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2236             | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2237             | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2238             | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2239             | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2240             | ( (r_AH & 0xFF) << 8 )
2241             | ( (r_AL & 0xFF) << 0 );
2242    return result;
2243 }
2244 
2245 
2246 /* CALLED FROM GENERATED CODE */
2247 /* DIRTY HELPER (non-referentially-transparent) */
2248 /* Horrible hack.  On non-x86 platforms, return 1. */
x86g_dirtyhelper_RDTSC(void)2249 ULong x86g_dirtyhelper_RDTSC ( void )
2250 {
2251 #  if defined(__i386__)
2252    ULong res;
2253    __asm__ __volatile__("rdtsc" : "=A" (res));
2254    return res;
2255 #  else
2256    return 1ULL;
2257 #  endif
2258 }
2259 
2260 
2261 /* CALLED FROM GENERATED CODE */
2262 /* DIRTY HELPER (modifies guest state) */
2263 /* Claim to be a P55C (Intel Pentium/MMX) */
x86g_dirtyhelper_CPUID_sse0(VexGuestX86State * st)2264 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2265 {
2266    switch (st->guest_EAX) {
2267       case 0:
2268          st->guest_EAX = 0x1;
2269          st->guest_EBX = 0x756e6547;
2270          st->guest_ECX = 0x6c65746e;
2271          st->guest_EDX = 0x49656e69;
2272          break;
2273       default:
2274          st->guest_EAX = 0x543;
2275          st->guest_EBX = 0x0;
2276          st->guest_ECX = 0x0;
2277          st->guest_EDX = 0x8001bf;
2278          break;
2279    }
2280 }
2281 
2282 /* CALLED FROM GENERATED CODE */
2283 /* DIRTY HELPER (modifies guest state) */
2284 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2285 /* But without 3DNow support (weird, but we really don't support it). */
x86g_dirtyhelper_CPUID_mmxext(VexGuestX86State * st)2286 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
2287 {
2288    switch (st->guest_EAX) {
2289       /* vendor ID */
2290       case 0:
2291          st->guest_EAX = 0x1;
2292          st->guest_EBX = 0x68747541;
2293          st->guest_ECX = 0x444d4163;
2294          st->guest_EDX = 0x69746e65;
2295          break;
2296       /* feature bits */
2297       case 1:
2298          st->guest_EAX = 0x621;
2299          st->guest_EBX = 0x0;
2300          st->guest_ECX = 0x0;
2301          st->guest_EDX = 0x183f9ff;
2302          break;
2303       /* Highest Extended Function Supported (0x80000004 brand string) */
2304       case 0x80000000:
2305          st->guest_EAX = 0x80000004;
2306          st->guest_EBX = 0x68747541;
2307          st->guest_ECX = 0x444d4163;
2308          st->guest_EDX = 0x69746e65;
2309          break;
2310       /* Extended Processor Info and Feature Bits */
2311       case 0x80000001:
2312          st->guest_EAX = 0x721;
2313          st->guest_EBX = 0x0;
2314          st->guest_ECX = 0x0;
2315          st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
2316          break;
2317       /* Processor Brand String "AMD Athlon(tm) Processor" */
2318       case 0x80000002:
2319          st->guest_EAX = 0x20444d41;
2320          st->guest_EBX = 0x6c687441;
2321          st->guest_ECX = 0x74286e6f;
2322          st->guest_EDX = 0x5020296d;
2323          break;
2324       case 0x80000003:
2325          st->guest_EAX = 0x65636f72;
2326          st->guest_EBX = 0x726f7373;
2327          st->guest_ECX = 0x0;
2328          st->guest_EDX = 0x0;
2329          break;
2330       default:
2331          st->guest_EAX = 0x0;
2332          st->guest_EBX = 0x0;
2333          st->guest_ECX = 0x0;
2334          st->guest_EDX = 0x0;
2335          break;
2336    }
2337 }
2338 
2339 /* CALLED FROM GENERATED CODE */
2340 /* DIRTY HELPER (modifies guest state) */
2341 /* Claim to be the following SSE1-capable CPU:
2342    vendor_id       : GenuineIntel
2343    cpu family      : 6
2344    model           : 11
2345    model name      : Intel(R) Pentium(R) III CPU family      1133MHz
2346    stepping        : 1
2347    cpu MHz         : 1131.013
2348    cache size      : 512 KB
2349 */
x86g_dirtyhelper_CPUID_sse1(VexGuestX86State * st)2350 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2351 {
2352    switch (st->guest_EAX) {
2353       case 0:
2354          st->guest_EAX = 0x00000002;
2355          st->guest_EBX = 0x756e6547;
2356          st->guest_ECX = 0x6c65746e;
2357          st->guest_EDX = 0x49656e69;
2358          break;
2359       case 1:
2360          st->guest_EAX = 0x000006b1;
2361          st->guest_EBX = 0x00000004;
2362          st->guest_ECX = 0x00000000;
2363          st->guest_EDX = 0x0383fbff;
2364          break;
2365       default:
2366          st->guest_EAX = 0x03020101;
2367          st->guest_EBX = 0x00000000;
2368          st->guest_ECX = 0x00000000;
2369          st->guest_EDX = 0x0c040883;
2370          break;
2371    }
2372 }
2373 
2374 /* Claim to be the following SSE2-capable CPU:
2375    vendor_id    : GenuineIntel
2376    cpu family   : 15
2377    model        : 2
2378    model name   : Intel(R) Pentium(R) 4 CPU 3.00GHz
2379    stepping     : 9
2380    microcode    : 0x17
2381    cpu MHz      : 2992.577
2382    cache size   : 512 KB
2383    flags        : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
2384                   pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
2385                    pebs bts cid xtpr
2386    clflush size : 64
2387    cache_alignment : 128
2388    address sizes : 36 bits physical, 32 bits virtual
2389 */
x86g_dirtyhelper_CPUID_sse2(VexGuestX86State * st)2390 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2391 {
2392    switch (st->guest_EAX) {
2393       case 0:
2394          st->guest_EAX = 0x00000002;
2395          st->guest_EBX = 0x756e6547;
2396          st->guest_ECX = 0x6c65746e;
2397          st->guest_EDX = 0x49656e69;
2398          break;
2399       case 1:
2400          st->guest_EAX = 0x00000f29;
2401          st->guest_EBX = 0x01020809;
2402          st->guest_ECX = 0x00004400;
2403          st->guest_EDX = 0xbfebfbff;
2404          break;
2405       default:
2406          st->guest_EAX = 0x03020101;
2407          st->guest_EBX = 0x00000000;
2408          st->guest_ECX = 0x00000000;
2409          st->guest_EDX = 0x0c040883;
2410          break;
2411    }
2412 }
2413 
2414 /* Claim to be the following SSSE3-capable CPU (2 x ...):
2415    vendor_id       : GenuineIntel
2416    cpu family      : 6
2417    model           : 15
2418    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2419    stepping        : 6
2420    cpu MHz         : 2394.000
2421    cache size      : 4096 KB
2422    physical id     : 0
2423    siblings        : 2
2424    core id         : 0
2425    cpu cores       : 2
2426    fpu             : yes
2427    fpu_exception   : yes
2428    cpuid level     : 10
2429    wp              : yes
2430    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2431                      mtrr pge mca cmov pat pse36 clflush dts acpi
2432                      mmx fxsr sse sse2 ss ht tm syscall nx lm
2433                      constant_tsc pni monitor ds_cpl vmx est tm2
2434                      cx16 xtpr lahf_lm
2435    bogomips        : 4798.78
2436    clflush size    : 64
2437    cache_alignment : 64
2438    address sizes   : 36 bits physical, 48 bits virtual
2439    power management:
2440 */
x86g_dirtyhelper_CPUID_sse3(VexGuestX86State * st)2441 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st )
2442 {
2443 #  define SET_ABCD(_a,_b,_c,_d)               \
2444       do { st->guest_EAX = (UInt)(_a);        \
2445            st->guest_EBX = (UInt)(_b);        \
2446            st->guest_ECX = (UInt)(_c);        \
2447            st->guest_EDX = (UInt)(_d);        \
2448       } while (0)
2449 
2450    switch (st->guest_EAX) {
2451       case 0x00000000:
2452          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2453          break;
2454       case 0x00000001:
2455          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2456          break;
2457       case 0x00000002:
2458          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2459          break;
2460       case 0x00000003:
2461          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2462          break;
2463       case 0x00000004: {
2464          switch (st->guest_ECX) {
2465             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2466                                       0x0000003f, 0x00000001); break;
2467             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2468                                       0x0000003f, 0x00000001); break;
2469             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2470                                       0x00000fff, 0x00000001); break;
2471             default:         SET_ABCD(0x00000000, 0x00000000,
2472                                       0x00000000, 0x00000000); break;
2473          }
2474          break;
2475       }
2476       case 0x00000005:
2477          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2478          break;
2479       case 0x00000006:
2480          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2481          break;
2482       case 0x00000007:
2483          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2484          break;
2485       case 0x00000008:
2486          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2487          break;
2488       case 0x00000009:
2489          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2490          break;
2491       case 0x0000000a:
2492       unhandled_eax_value:
2493          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2494          break;
2495       case 0x80000000:
2496          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2497          break;
2498       case 0x80000001:
2499          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2500          break;
2501       case 0x80000002:
2502          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2503          break;
2504       case 0x80000003:
2505          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2506          break;
2507       case 0x80000004:
2508          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2509          break;
2510       case 0x80000005:
2511          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2512          break;
2513       case 0x80000006:
2514          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2515          break;
2516       case 0x80000007:
2517          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2518          break;
2519       case 0x80000008:
2520          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2521          break;
2522       default:
2523          goto unhandled_eax_value;
2524    }
2525 #  undef SET_ABCD
2526 }
2527 
2528 
2529 /* CALLED FROM GENERATED CODE */
2530 /* DIRTY HELPER (non-referentially-transparent) */
2531 /* Horrible hack.  On non-x86 platforms, return 0. */
x86g_dirtyhelper_IN(UInt portno,UInt sz)2532 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2533 {
2534 #  if defined(__i386__)
2535    UInt r = 0;
2536    portno &= 0xFFFF;
2537    switch (sz) {
2538       case 4:
2539          __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2540                               : "=a" (r) : "Nd" (portno));
2541 	 break;
2542       case 2:
2543          __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2544                               : "=a" (r) : "Nd" (portno));
2545 	 break;
2546       case 1:
2547          __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2548                               : "=a" (r) : "Nd" (portno));
2549 	 break;
2550       default:
2551          break;
2552    }
2553    return r;
2554 #  else
2555    return 0;
2556 #  endif
2557 }
2558 
2559 
2560 /* CALLED FROM GENERATED CODE */
2561 /* DIRTY HELPER (non-referentially-transparent) */
2562 /* Horrible hack.  On non-x86 platforms, do nothing. */
x86g_dirtyhelper_OUT(UInt portno,UInt data,UInt sz)2563 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2564 {
2565 #  if defined(__i386__)
2566    portno &= 0xFFFF;
2567    switch (sz) {
2568       case 4:
2569          __asm__ __volatile__("outl %0, %w1"
2570                               : : "a" (data), "Nd" (portno));
2571 	 break;
2572       case 2:
2573          __asm__ __volatile__("outw %w0, %w1"
2574                               : : "a" (data), "Nd" (portno));
2575 	 break;
2576       case 1:
2577          __asm__ __volatile__("outb %b0, %w1"
2578                               : : "a" (data), "Nd" (portno));
2579 	 break;
2580       default:
2581          break;
2582    }
2583 #  else
2584    /* do nothing */
2585 #  endif
2586 }
2587 
2588 /* CALLED FROM GENERATED CODE */
2589 /* DIRTY HELPER (non-referentially-transparent) */
2590 /* Horrible hack.  On non-x86 platforms, do nothing. */
2591 /* op = 0: call the native SGDT instruction.
2592    op = 1: call the native SIDT instruction.
2593 */
x86g_dirtyhelper_SxDT(void * address,UInt op)2594 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2595 #  if defined(__i386__)
2596    switch (op) {
2597       case 0:
2598          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2599          break;
2600       case 1:
2601          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2602          break;
2603       default:
2604          vpanic("x86g_dirtyhelper_SxDT");
2605    }
2606 #  else
2607    /* do nothing */
2608    UChar* p = (UChar*)address;
2609    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2610 #  endif
2611 }
2612 
2613 /*---------------------------------------------------------------*/
2614 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
2615 /*---------------------------------------------------------------*/
2616 
abdU8(UChar xx,UChar yy)2617 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2618    return toUChar(xx>yy ? xx-yy : yy-xx);
2619 }
2620 
mk32x2(UInt w1,UInt w0)2621 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2622    return (((ULong)w1) << 32) | ((ULong)w0);
2623 }
2624 
sel16x4_3(ULong w64)2625 static inline UShort sel16x4_3 ( ULong w64 ) {
2626    UInt hi32 = toUInt(w64 >> 32);
2627    return toUShort(hi32 >> 16);
2628 }
sel16x4_2(ULong w64)2629 static inline UShort sel16x4_2 ( ULong w64 ) {
2630    UInt hi32 = toUInt(w64 >> 32);
2631    return toUShort(hi32);
2632 }
sel16x4_1(ULong w64)2633 static inline UShort sel16x4_1 ( ULong w64 ) {
2634    UInt lo32 = toUInt(w64);
2635    return toUShort(lo32 >> 16);
2636 }
sel16x4_0(ULong w64)2637 static inline UShort sel16x4_0 ( ULong w64 ) {
2638    UInt lo32 = toUInt(w64);
2639    return toUShort(lo32);
2640 }
2641 
sel8x8_7(ULong w64)2642 static inline UChar sel8x8_7 ( ULong w64 ) {
2643    UInt hi32 = toUInt(w64 >> 32);
2644    return toUChar(hi32 >> 24);
2645 }
sel8x8_6(ULong w64)2646 static inline UChar sel8x8_6 ( ULong w64 ) {
2647    UInt hi32 = toUInt(w64 >> 32);
2648    return toUChar(hi32 >> 16);
2649 }
sel8x8_5(ULong w64)2650 static inline UChar sel8x8_5 ( ULong w64 ) {
2651    UInt hi32 = toUInt(w64 >> 32);
2652    return toUChar(hi32 >> 8);
2653 }
sel8x8_4(ULong w64)2654 static inline UChar sel8x8_4 ( ULong w64 ) {
2655    UInt hi32 = toUInt(w64 >> 32);
2656    return toUChar(hi32 >> 0);
2657 }
sel8x8_3(ULong w64)2658 static inline UChar sel8x8_3 ( ULong w64 ) {
2659    UInt lo32 = toUInt(w64);
2660    return toUChar(lo32 >> 24);
2661 }
sel8x8_2(ULong w64)2662 static inline UChar sel8x8_2 ( ULong w64 ) {
2663    UInt lo32 = toUInt(w64);
2664    return toUChar(lo32 >> 16);
2665 }
sel8x8_1(ULong w64)2666 static inline UChar sel8x8_1 ( ULong w64 ) {
2667    UInt lo32 = toUInt(w64);
2668    return toUChar(lo32 >> 8);
2669 }
sel8x8_0(ULong w64)2670 static inline UChar sel8x8_0 ( ULong w64 ) {
2671    UInt lo32 = toUInt(w64);
2672    return toUChar(lo32 >> 0);
2673 }
2674 
2675 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_pmaddwd(ULong xx,ULong yy)2676 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2677 {
2678    return
2679       mk32x2(
2680          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2681             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2682          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2683             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2684       );
2685 }
2686 
2687 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_calculate_mmx_psadbw(ULong xx,ULong yy)2688 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2689 {
2690    UInt t = 0;
2691    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2692    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2693    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2694    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2695    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2696    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2697    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2698    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2699    t &= 0xFFFF;
2700    return (ULong)t;
2701 }
2702 
2703 
2704 /*---------------------------------------------------------------*/
2705 /*--- Helpers for dealing with segment overrides.             ---*/
2706 /*---------------------------------------------------------------*/
2707 
2708 static inline
get_segdescr_base(VexGuestX86SegDescr * ent)2709 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2710 {
2711    UInt lo  = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2712    UInt mid =   0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2713    UInt hi  =   0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2714    return (hi << 24) | (mid << 16) | lo;
2715 }
2716 
2717 static inline
get_segdescr_limit(VexGuestX86SegDescr * ent)2718 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2719 {
2720     UInt lo    = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2721     UInt hi    =    0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2722     UInt limit = (hi << 16) | lo;
2723     if (ent->LdtEnt.Bits.Granularity)
2724        limit = (limit << 12) | 0xFFF;
2725     return limit;
2726 }
2727 
2728 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
x86g_use_seg_selector(HWord ldt,HWord gdt,UInt seg_selector,UInt virtual_addr)2729 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2730                               UInt seg_selector, UInt virtual_addr )
2731 {
2732    UInt tiBit, base, limit;
2733    VexGuestX86SegDescr* the_descrs;
2734 
2735    Bool verboze = False;
2736 
2737    /* If this isn't true, we're in Big Trouble. */
2738    vassert(8 == sizeof(VexGuestX86SegDescr));
2739 
2740    if (verboze)
2741       vex_printf("x86h_use_seg_selector: "
2742                  "seg_selector = 0x%x, vaddr = 0x%x\n",
2743                  seg_selector, virtual_addr);
2744 
2745    /* Check for wildly invalid selector. */
2746    if (seg_selector & ~0xFFFF)
2747       goto bad;
2748 
2749    seg_selector &= 0x0000FFFF;
2750 
2751    /* Sanity check the segment selector.  Ensure that RPL=11b (least
2752       privilege).  This forms the bottom 2 bits of the selector. */
2753    if ((seg_selector & 3) != 3)
2754       goto bad;
2755 
2756    /* Extract the TI bit (0 means GDT, 1 means LDT) */
2757    tiBit = (seg_selector >> 2) & 1;
2758 
2759    /* Convert the segment selector onto a table index */
2760    seg_selector >>= 3;
2761    vassert(seg_selector >= 0 && seg_selector < 8192);
2762 
2763    if (tiBit == 0) {
2764 
2765       /* GDT access. */
2766       /* Do we actually have a GDT to look at? */
2767       if (gdt == 0)
2768          goto bad;
2769 
2770       /* Check for access to non-existent entry. */
2771       if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
2772          goto bad;
2773 
2774       the_descrs = (VexGuestX86SegDescr*)gdt;
2775       base  = get_segdescr_base (&the_descrs[seg_selector]);
2776       limit = get_segdescr_limit(&the_descrs[seg_selector]);
2777 
2778    } else {
2779 
2780       /* All the same stuff, except for the LDT. */
2781       if (ldt == 0)
2782          goto bad;
2783 
2784       if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2785          goto bad;
2786 
2787       the_descrs = (VexGuestX86SegDescr*)ldt;
2788       base  = get_segdescr_base (&the_descrs[seg_selector]);
2789       limit = get_segdescr_limit(&the_descrs[seg_selector]);
2790 
2791    }
2792 
2793    /* Do the limit check.  Note, this check is just slightly too
2794       slack.  Really it should be "if (virtual_addr + size - 1 >=
2795       limit)," but we don't have the size info to hand.  Getting it
2796       could be significantly complex.  */
2797    if (virtual_addr >= limit)
2798       goto bad;
2799 
2800    if (verboze)
2801       vex_printf("x86h_use_seg_selector: "
2802                  "base = 0x%x, addr = 0x%x\n",
2803                  base, base + virtual_addr);
2804 
2805    /* High 32 bits are zero, indicating success. */
2806    return (ULong)( ((UInt)virtual_addr) + base );
2807 
2808  bad:
2809    return 1ULL << 32;
2810 }
2811 
2812 
2813 /*---------------------------------------------------------------*/
2814 /*--- Helpers for dealing with, and describing,               ---*/
2815 /*--- guest state as a whole.                                 ---*/
2816 /*---------------------------------------------------------------*/
2817 
2818 /* Initialise the entire x86 guest state. */
2819 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestX86_initialise(VexGuestX86State * vex_state)2820 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2821 {
2822    vex_state->host_EvC_FAILADDR = 0;
2823    vex_state->host_EvC_COUNTER = 0;
2824 
2825    vex_state->guest_EAX = 0;
2826    vex_state->guest_ECX = 0;
2827    vex_state->guest_EDX = 0;
2828    vex_state->guest_EBX = 0;
2829    vex_state->guest_ESP = 0;
2830    vex_state->guest_EBP = 0;
2831    vex_state->guest_ESI = 0;
2832    vex_state->guest_EDI = 0;
2833 
2834    vex_state->guest_CC_OP   = X86G_CC_OP_COPY;
2835    vex_state->guest_CC_DEP1 = 0;
2836    vex_state->guest_CC_DEP2 = 0;
2837    vex_state->guest_CC_NDEP = 0;
2838    vex_state->guest_DFLAG   = 1; /* forwards */
2839    vex_state->guest_IDFLAG  = 0;
2840    vex_state->guest_ACFLAG  = 0;
2841 
2842    vex_state->guest_EIP = 0;
2843 
2844    /* Initialise the simulated FPU */
2845    x86g_dirtyhelper_FINIT( vex_state );
2846 
2847    /* Initialse the SSE state. */
2848 #  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2849 
2850    vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2851    SSEZERO(vex_state->guest_XMM0);
2852    SSEZERO(vex_state->guest_XMM1);
2853    SSEZERO(vex_state->guest_XMM2);
2854    SSEZERO(vex_state->guest_XMM3);
2855    SSEZERO(vex_state->guest_XMM4);
2856    SSEZERO(vex_state->guest_XMM5);
2857    SSEZERO(vex_state->guest_XMM6);
2858    SSEZERO(vex_state->guest_XMM7);
2859 
2860 #  undef SSEZERO
2861 
2862    vex_state->guest_CS  = 0;
2863    vex_state->guest_DS  = 0;
2864    vex_state->guest_ES  = 0;
2865    vex_state->guest_FS  = 0;
2866    vex_state->guest_GS  = 0;
2867    vex_state->guest_SS  = 0;
2868    vex_state->guest_LDT = 0;
2869    vex_state->guest_GDT = 0;
2870 
2871    vex_state->guest_EMNOTE = EmNote_NONE;
2872 
2873    /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2874    vex_state->guest_CMSTART = 0;
2875    vex_state->guest_CMLEN   = 0;
2876 
2877    vex_state->guest_NRADDR   = 0;
2878    vex_state->guest_SC_CLASS = 0;
2879    vex_state->guest_IP_AT_SYSCALL = 0;
2880 
2881    vex_state->padding1 = 0;
2882 }
2883 
2884 
2885 /* Figure out if any part of the guest state contained in minoff
2886    .. maxoff requires precise memory exceptions.  If in doubt return
2887    True (but this generates significantly slower code).
2888 
2889    By default we enforce precise exns for guest %ESP, %EBP and %EIP
2890    only.  These are the minimum needed to extract correct stack
2891    backtraces from x86 code.
2892 
2893    Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2894 */
guest_x86_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)2895 Bool guest_x86_state_requires_precise_mem_exns (
2896         Int minoff, Int maxoff, VexRegisterUpdates pxControl
2897      )
2898 {
2899    Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2900    Int ebp_max = ebp_min + 4 - 1;
2901    Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2902    Int esp_max = esp_min + 4 - 1;
2903    Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2904    Int eip_max = eip_min + 4 - 1;
2905 
2906    if (maxoff < esp_min || minoff > esp_max) {
2907       /* no overlap with esp */
2908       if (pxControl == VexRegUpdSpAtMemAccess)
2909          return False; // We only need to check stack pointer.
2910    } else {
2911       return True;
2912    }
2913 
2914    if (maxoff < ebp_min || minoff > ebp_max) {
2915       /* no overlap with ebp */
2916    } else {
2917       return True;
2918    }
2919 
2920    if (maxoff < eip_min || minoff > eip_max) {
2921       /* no overlap with eip */
2922    } else {
2923       return True;
2924    }
2925 
2926    return False;
2927 }
2928 
2929 
2930 #define ALWAYSDEFD(field)                           \
2931     { offsetof(VexGuestX86State, field),            \
2932       (sizeof ((VexGuestX86State*)0)->field) }
2933 
2934 VexGuestLayout
2935    x86guest_layout
2936       = {
2937           /* Total size of the guest state, in bytes. */
2938           .total_sizeB = sizeof(VexGuestX86State),
2939 
2940           /* Describe the stack pointer. */
2941           .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2942           .sizeof_SP = 4,
2943 
2944           /* Describe the frame pointer. */
2945           .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2946           .sizeof_FP = 4,
2947 
2948           /* Describe the instruction pointer. */
2949           .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2950           .sizeof_IP = 4,
2951 
2952           /* Describe any sections to be regarded by Memcheck as
2953              'always-defined'. */
2954           .n_alwaysDefd = 24,
2955 
2956           /* flags thunk: OP and NDEP are always defd, whereas DEP1
2957              and DEP2 have to be tracked.  See detailed comment in
2958              gdefs.h on meaning of thunk fields. */
2959           .alwaysDefd
2960              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
2961                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
2962                  /*  2 */ ALWAYSDEFD(guest_DFLAG),
2963                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
2964                  /*  4 */ ALWAYSDEFD(guest_ACFLAG),
2965                  /*  5 */ ALWAYSDEFD(guest_EIP),
2966                  /*  6 */ ALWAYSDEFD(guest_FTOP),
2967                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
2968                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
2969                  /*  9 */ ALWAYSDEFD(guest_FC3210),
2970                  /* 10 */ ALWAYSDEFD(guest_CS),
2971                  /* 11 */ ALWAYSDEFD(guest_DS),
2972                  /* 12 */ ALWAYSDEFD(guest_ES),
2973                  /* 13 */ ALWAYSDEFD(guest_FS),
2974                  /* 14 */ ALWAYSDEFD(guest_GS),
2975                  /* 15 */ ALWAYSDEFD(guest_SS),
2976                  /* 16 */ ALWAYSDEFD(guest_LDT),
2977                  /* 17 */ ALWAYSDEFD(guest_GDT),
2978                  /* 18 */ ALWAYSDEFD(guest_EMNOTE),
2979                  /* 19 */ ALWAYSDEFD(guest_SSEROUND),
2980                  /* 20 */ ALWAYSDEFD(guest_CMSTART),
2981                  /* 21 */ ALWAYSDEFD(guest_CMLEN),
2982                  /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
2983                  /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
2984                }
2985         };
2986 
2987 
2988 /*---------------------------------------------------------------*/
2989 /*--- end                                 guest_x86_helpers.c ---*/
2990 /*---------------------------------------------------------------*/
2991