• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                             guest_amd64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2011 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_emwarn.h"
38 #include "libvex_guest_amd64.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41 
42 #include "main_util.h"
43 #include "guest_generic_bb_to_IR.h"
44 #include "guest_amd64_defs.h"
45 #include "guest_generic_x87.h"
46 
47 
48 /* This file contains helper functions for amd64 guest code.
49    Calls to these functions are generated by the back end.
50    These calls are of course in the host machine code and
51    this file will be compiled to host machine code, so that
52    all makes sense.
53 
54    Only change the signatures of these helper functions very
55    carefully.  If you change the signature here, you'll have to change
56    the parameters passed to it in the IR calls constructed by
57    guest-amd64/toIR.c.
58 
59    The convention used is that all functions called from generated
60    code are named amd64g_<something>, and any function whose name lacks
61    that prefix is not called from generated code.  Note that some
62    LibVEX_* functions can however be called by VEX's client, but that
63    is not the same as calling them from VEX-generated code.
64 */
65 
66 
67 /* Set to 1 to get detailed profiling info about use of the flag
68    machinery. */
69 #define PROFILE_RFLAGS 0
70 
71 
72 /*---------------------------------------------------------------*/
73 /*--- %rflags run-time helpers.                               ---*/
74 /*---------------------------------------------------------------*/
75 
76 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
77    after imulq/mulq. */
78 
mullS64(Long u,Long v,Long * rHi,Long * rLo)79 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
80 {
81    ULong u0, v0, w0;
82     Long u1, v1, w1, w2, t;
83    u0   = u & 0xFFFFFFFFULL;
84    u1   = u >> 32;
85    v0   = v & 0xFFFFFFFFULL;
86    v1   = v >> 32;
87    w0   = u0 * v0;
88    t    = u1 * v0 + (w0 >> 32);
89    w1   = t & 0xFFFFFFFFULL;
90    w2   = t >> 32;
91    w1   = u0 * v1 + w1;
92    *rHi = u1 * v1 + w2 + (w1 >> 32);
93    *rLo = u * v;
94 }
95 
mullU64(ULong u,ULong v,ULong * rHi,ULong * rLo)96 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
97 {
98    ULong u0, v0, w0;
99    ULong u1, v1, w1,w2,t;
100    u0   = u & 0xFFFFFFFFULL;
101    u1   = u >> 32;
102    v0   = v & 0xFFFFFFFFULL;
103    v1   = v >> 32;
104    w0   = u0 * v0;
105    t    = u1 * v0 + (w0 >> 32);
106    w1   = t & 0xFFFFFFFFULL;
107    w2   = t >> 32;
108    w1   = u0 * v1 + w1;
109    *rHi = u1 * v1 + w2 + (w1 >> 32);
110    *rLo = u * v;
111 }
112 
113 
114 static const UChar parity_table[256] = {
115     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
116     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
117     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
118     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
119     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
120     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
121     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
122     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
123     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
124     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
125     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
126     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
127     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
128     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
129     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
130     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
131     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
132     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
133     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
134     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
135     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
136     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
137     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
138     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
139     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
140     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
141     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
142     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
143     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
144     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
145     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
146     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
147 };
148 
149 /* generalised left-shifter */
lshift(Long x,Int n)150 static inline Long lshift ( Long x, Int n )
151 {
152    if (n >= 0)
153       return x << n;
154    else
155       return x >> (-n);
156 }
157 
158 /* identity on ULong */
idULong(ULong x)159 static inline ULong idULong ( ULong x )
160 {
161    return x;
162 }
163 
164 
165 #define PREAMBLE(__data_bits)					\
166    /* const */ ULong DATA_MASK 					\
167       = __data_bits==8                                          \
168            ? 0xFFULL 					        \
169            : (__data_bits==16                                   \
170                 ? 0xFFFFULL 		                        \
171                 : (__data_bits==32                              \
172                      ? 0xFFFFFFFFULL                            \
173                      : 0xFFFFFFFFFFFFFFFFULL));                 \
174    /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1);     \
175    /* const */ ULong CC_DEP1 = cc_dep1_formal;			\
176    /* const */ ULong CC_DEP2 = cc_dep2_formal;			\
177    /* const */ ULong CC_NDEP = cc_ndep_formal;			\
178    /* Four bogus assignments, which hopefully gcc can     */	\
179    /* optimise away, and which stop it complaining about  */	\
180    /* unused variables.                                   */	\
181    SIGN_MASK = SIGN_MASK;					\
182    DATA_MASK = DATA_MASK;					\
183    CC_DEP2 = CC_DEP2;						\
184    CC_NDEP = CC_NDEP;
185 
186 
187 /*-------------------------------------------------------------*/
188 
189 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
190 {								\
191    PREAMBLE(DATA_BITS);						\
192    { Long cf, pf, af, zf, sf, of;				\
193      Long argL, argR, res;					\
194      argL = CC_DEP1;						\
195      argR = CC_DEP2;						\
196      res  = argL + argR;					\
197      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
198      pf = parity_table[(UChar)res];				\
199      af = (res ^ argL ^ argR) & 0x10;				\
200      zf = ((DATA_UTYPE)res == 0) << 6;				\
201      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
202      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
203                  12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
204      return cf | pf | af | zf | sf | of;			\
205    }								\
206 }
207 
208 /*-------------------------------------------------------------*/
209 
210 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
211 {								\
212    PREAMBLE(DATA_BITS);						\
213    { Long cf, pf, af, zf, sf, of;				\
214      Long argL, argR, res;					\
215      argL = CC_DEP1;						\
216      argR = CC_DEP2;						\
217      res  = argL - argR;					\
218      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
219      pf = parity_table[(UChar)res];				\
220      af = (res ^ argL ^ argR) & 0x10;				\
221      zf = ((DATA_UTYPE)res == 0) << 6;				\
222      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
223      of = lshift((argL ^ argR) & (argL ^ res),	 		\
224                  12 - DATA_BITS) & AMD64G_CC_MASK_O; 		\
225      return cf | pf | af | zf | sf | of;			\
226    }								\
227 }
228 
229 /*-------------------------------------------------------------*/
230 
231 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
232 {								\
233    PREAMBLE(DATA_BITS);						\
234    { Long cf, pf, af, zf, sf, of;				\
235      Long argL, argR, oldC, res;		 		\
236      oldC = CC_NDEP & AMD64G_CC_MASK_C;				\
237      argL = CC_DEP1;						\
238      argR = CC_DEP2 ^ oldC;	       				\
239      res  = (argL + argR) + oldC;				\
240      if (oldC)							\
241         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
242      else							\
243         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
244      pf = parity_table[(UChar)res];				\
245      af = (res ^ argL ^ argR) & 0x10;				\
246      zf = ((DATA_UTYPE)res == 0) << 6;				\
247      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
248      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
249                   12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
250      return cf | pf | af | zf | sf | of;			\
251    }								\
252 }
253 
254 /*-------------------------------------------------------------*/
255 
256 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
257 {								\
258    PREAMBLE(DATA_BITS);						\
259    { Long cf, pf, af, zf, sf, of;				\
260      Long argL, argR, oldC, res;	       			\
261      oldC = CC_NDEP & AMD64G_CC_MASK_C;				\
262      argL = CC_DEP1;						\
263      argR = CC_DEP2 ^ oldC;	       				\
264      res  = (argL - argR) - oldC;				\
265      if (oldC)							\
266         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
267      else							\
268         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
269      pf = parity_table[(UChar)res];				\
270      af = (res ^ argL ^ argR) & 0x10;				\
271      zf = ((DATA_UTYPE)res == 0) << 6;				\
272      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
273      of = lshift((argL ^ argR) & (argL ^ res), 			\
274                  12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
275      return cf | pf | af | zf | sf | of;			\
276    }								\
277 }
278 
279 /*-------------------------------------------------------------*/
280 
281 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
282 {								\
283    PREAMBLE(DATA_BITS);						\
284    { Long cf, pf, af, zf, sf, of;				\
285      cf = 0;							\
286      pf = parity_table[(UChar)CC_DEP1];				\
287      af = 0;							\
288      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
289      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
290      of = 0;							\
291      return cf | pf | af | zf | sf | of;			\
292    }								\
293 }
294 
295 /*-------------------------------------------------------------*/
296 
297 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
298 {								\
299    PREAMBLE(DATA_BITS);						\
300    { Long cf, pf, af, zf, sf, of;				\
301      Long argL, argR, res;					\
302      res  = CC_DEP1;						\
303      argL = res - 1;						\
304      argR = 1;							\
305      cf = CC_NDEP & AMD64G_CC_MASK_C;				\
306      pf = parity_table[(UChar)res];				\
307      af = (res ^ argL ^ argR) & 0x10;				\
308      zf = ((DATA_UTYPE)res == 0) << 6;				\
309      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
310      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
311      return cf | pf | af | zf | sf | of;			\
312    }								\
313 }
314 
315 /*-------------------------------------------------------------*/
316 
317 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
318 {								\
319    PREAMBLE(DATA_BITS);						\
320    { Long cf, pf, af, zf, sf, of;				\
321      Long argL, argR, res;					\
322      res  = CC_DEP1;						\
323      argL = res + 1;						\
324      argR = 1;							\
325      cf = CC_NDEP & AMD64G_CC_MASK_C;				\
326      pf = parity_table[(UChar)res];				\
327      af = (res ^ argL ^ argR) & 0x10;				\
328      zf = ((DATA_UTYPE)res == 0) << 6;				\
329      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
330      of = ((res & DATA_MASK) 					\
331           == ((ULong)SIGN_MASK - 1)) << 11;			\
332      return cf | pf | af | zf | sf | of;			\
333    }								\
334 }
335 
336 /*-------------------------------------------------------------*/
337 
338 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
339 {								\
340    PREAMBLE(DATA_BITS);						\
341    { Long cf, pf, af, zf, sf, of;				\
342      cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C;	\
343      pf = parity_table[(UChar)CC_DEP1];				\
344      af = 0; /* undefined */					\
345      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
346      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
347      /* of is defined if shift count == 1 */			\
348      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
349           & AMD64G_CC_MASK_O;					\
350      return cf | pf | af | zf | sf | of;			\
351    }								\
352 }
353 
354 /*-------------------------------------------------------------*/
355 
356 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
357 {								\
358    PREAMBLE(DATA_BITS);  					\
359    { Long cf, pf, af, zf, sf, of;				\
360      cf = CC_DEP2 & 1;						\
361      pf = parity_table[(UChar)CC_DEP1];				\
362      af = 0; /* undefined */					\
363      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
364      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
365      /* of is defined if shift count == 1 */			\
366      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
367           & AMD64G_CC_MASK_O;					\
368      return cf | pf | af | zf | sf | of;			\
369    }								\
370 }
371 
372 /*-------------------------------------------------------------*/
373 
374 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
375 /* DEP1 = result, NDEP = old flags */
376 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
377 {								\
378    PREAMBLE(DATA_BITS);						\
379    { Long fl 							\
380         = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C))	\
381           | (AMD64G_CC_MASK_C & CC_DEP1)			\
382           | (AMD64G_CC_MASK_O & (lshift(CC_DEP1,  		\
383                                       11-(DATA_BITS-1)) 	\
384                      ^ lshift(CC_DEP1, 11)));			\
385      return fl;							\
386    }								\
387 }
388 
389 /*-------------------------------------------------------------*/
390 
391 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
392 /* DEP1 = result, NDEP = old flags */
393 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
394 {								\
395    PREAMBLE(DATA_BITS);						\
396    { Long fl 							\
397         = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C))	\
398           | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
399           | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, 		\
400                                       11-(DATA_BITS-1)) 	\
401                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
402      return fl;							\
403    }								\
404 }
405 
406 /*-------------------------------------------------------------*/
407 
408 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
409                                 DATA_U2TYPE, NARROWto2U)        \
410 {                                                               \
411    PREAMBLE(DATA_BITS);                                         \
412    { Long cf, pf, af, zf, sf, of;                               \
413      DATA_UTYPE  hi;                                            \
414      DATA_UTYPE  lo                                             \
415         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
416                      * ((DATA_UTYPE)CC_DEP2) );                 \
417      DATA_U2TYPE rr                                             \
418         = NARROWto2U(                                           \
419              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
420              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
421      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
422      cf = (hi != 0);                                            \
423      pf = parity_table[(UChar)lo];                              \
424      af = 0; /* undefined */                                    \
425      zf = (lo == 0) << 6;                                       \
426      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
427      of = cf << 11;                                             \
428      return cf | pf | af | zf | sf | of;                        \
429    }								\
430 }
431 
432 /*-------------------------------------------------------------*/
433 
434 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
435                                 DATA_S2TYPE, NARROWto2S)        \
436 {                                                               \
437    PREAMBLE(DATA_BITS);                                         \
438    { Long cf, pf, af, zf, sf, of;                               \
439      DATA_STYPE  hi;                                            \
440      DATA_STYPE  lo                                             \
441         = NARROWtoS( ((DATA_STYPE)CC_DEP1)                      \
442                      * ((DATA_STYPE)CC_DEP2) );                 \
443      DATA_S2TYPE rr                                             \
444         = NARROWto2S(                                           \
445              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
446              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
447      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
448      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
449      pf = parity_table[(UChar)lo];                              \
450      af = 0; /* undefined */                                    \
451      zf = (lo == 0) << 6;                                       \
452      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
453      of = cf << 11;                                             \
454      return cf | pf | af | zf | sf | of;                        \
455    }								\
456 }
457 
458 /*-------------------------------------------------------------*/
459 
460 #define ACTIONS_UMULQ                                           \
461 {                                                               \
462    PREAMBLE(64);                                                \
463    { Long cf, pf, af, zf, sf, of;                               \
464      ULong lo, hi;                                              \
465      mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo );       \
466      cf = (hi != 0);                                            \
467      pf = parity_table[(UChar)lo];                              \
468      af = 0; /* undefined */                                    \
469      zf = (lo == 0) << 6;                                       \
470      sf = lshift(lo, 8 - 64) & 0x80;                            \
471      of = cf << 11;                                             \
472      return cf | pf | af | zf | sf | of;                        \
473    }								\
474 }
475 
476 /*-------------------------------------------------------------*/
477 
478 #define ACTIONS_SMULQ                                           \
479 {                                                               \
480    PREAMBLE(64);                                                \
481    { Long cf, pf, af, zf, sf, of;                               \
482      Long lo, hi;                                               \
483      mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo );         \
484      cf = (hi != (lo >>/*s*/ (64-1)));                          \
485      pf = parity_table[(UChar)lo];                              \
486      af = 0; /* undefined */                                    \
487      zf = (lo == 0) << 6;                                       \
488      sf = lshift(lo, 8 - 64) & 0x80;                            \
489      of = cf << 11;                                             \
490      return cf | pf | af | zf | sf | of;                        \
491    }								\
492 }
493 
494 
495 #if PROFILE_RFLAGS
496 
497 static Bool initted     = False;
498 
499 /* C flag, fast route */
500 static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
501 /* C flag, slow route */
502 static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
503 /* table for calculate_cond */
504 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
505 /* total entry counts for calc_all, calc_c, calc_cond. */
506 static UInt n_calc_all  = 0;
507 static UInt n_calc_c    = 0;
508 static UInt n_calc_cond = 0;
509 
510 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
511 
512 
showCounts(void)513 static void showCounts ( void )
514 {
515    Int op, co;
516    Char ch;
517    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
518               n_calc_all, n_calc_cond, n_calc_c);
519 
520    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
521               "    S   NS    P   NP    L   NL   LE  NLE\n");
522    vex_printf("     -----------------------------------------------------"
523               "----------------------------------------\n");
524    for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
525 
526       ch = ' ';
527       if (op > 0 && (op-1) % 4 == 0)
528          ch = 'B';
529       if (op > 0 && (op-1) % 4 == 1)
530          ch = 'W';
531       if (op > 0 && (op-1) % 4 == 2)
532          ch = 'L';
533       if (op > 0 && (op-1) % 4 == 3)
534          ch = 'Q';
535 
536       vex_printf("%2d%c: ", op, ch);
537       vex_printf("%6u ", tabc_slow[op]);
538       vex_printf("%6u ", tabc_fast[op]);
539       for (co = 0; co < 16; co++) {
540          Int n = tab_cond[op][co];
541          if (n >= 1000) {
542             vex_printf(" %3dK", n / 1000);
543          } else
544          if (n >= 0) {
545             vex_printf(" %3d ", n );
546          } else {
547             vex_printf("     ");
548          }
549       }
550       vex_printf("\n");
551    }
552    vex_printf("\n");
553 }
554 
initCounts(void)555 static void initCounts ( void )
556 {
557    Int op, co;
558    initted = True;
559    for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
560       tabc_fast[op] = tabc_slow[op] = 0;
561       for (co = 0; co < 16; co++)
562          tab_cond[op][co] = 0;
563    }
564 }
565 
566 #endif /* PROFILE_RFLAGS */
567 
568 
569 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
570 /* Calculate all the 6 flags from the supplied thunk parameters.
571    Worker function, not directly called from generated code. */
572 static
amd64g_calculate_rflags_all_WRK(ULong cc_op,ULong cc_dep1_formal,ULong cc_dep2_formal,ULong cc_ndep_formal)573 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
574                                         ULong cc_dep1_formal,
575                                         ULong cc_dep2_formal,
576                                         ULong cc_ndep_formal )
577 {
578    switch (cc_op) {
579       case AMD64G_CC_OP_COPY:
580          return cc_dep1_formal
581                 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
582                    | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
583 
584       case AMD64G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
585       case AMD64G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
586       case AMD64G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
587       case AMD64G_CC_OP_ADDQ:   ACTIONS_ADD( 64, ULong  );
588 
589       case AMD64G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
590       case AMD64G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
591       case AMD64G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
592       case AMD64G_CC_OP_ADCQ:   ACTIONS_ADC( 64, ULong  );
593 
594       case AMD64G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
595       case AMD64G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
596       case AMD64G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
597       case AMD64G_CC_OP_SUBQ:   ACTIONS_SUB( 64, ULong  );
598 
599       case AMD64G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
600       case AMD64G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
601       case AMD64G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
602       case AMD64G_CC_OP_SBBQ:   ACTIONS_SBB( 64, ULong  );
603 
604       case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
605       case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
606       case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
607       case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong  );
608 
609       case AMD64G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
610       case AMD64G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
611       case AMD64G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
612       case AMD64G_CC_OP_INCQ:   ACTIONS_INC( 64, ULong  );
613 
614       case AMD64G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
615       case AMD64G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
616       case AMD64G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
617       case AMD64G_CC_OP_DECQ:   ACTIONS_DEC( 64, ULong  );
618 
619       case AMD64G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
620       case AMD64G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
621       case AMD64G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
622       case AMD64G_CC_OP_SHLQ:   ACTIONS_SHL( 64, ULong  );
623 
624       case AMD64G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
625       case AMD64G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
626       case AMD64G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
627       case AMD64G_CC_OP_SHRQ:   ACTIONS_SHR( 64, ULong  );
628 
629       case AMD64G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
630       case AMD64G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
631       case AMD64G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
632       case AMD64G_CC_OP_ROLQ:   ACTIONS_ROL( 64, ULong  );
633 
634       case AMD64G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
635       case AMD64G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
636       case AMD64G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
637       case AMD64G_CC_OP_RORQ:   ACTIONS_ROR( 64, ULong  );
638 
639       case AMD64G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
640                                                   UShort, toUShort );
641       case AMD64G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
642                                                   UInt,   toUInt );
643       case AMD64G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
644                                                   ULong,  idULong );
645 
646       case AMD64G_CC_OP_UMULQ:  ACTIONS_UMULQ;
647 
648       case AMD64G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
649                                                   Short,  toUShort );
650       case AMD64G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
651                                                   Int,    toUInt   );
652       case AMD64G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
653                                                   Long,   idULong );
654 
655       case AMD64G_CC_OP_SMULQ:  ACTIONS_SMULQ;
656 
657       default:
658          /* shouldn't really make these calls from generated code */
659          vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
660                     "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
661                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
662          vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
663    }
664 }
665 
666 
667 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
668 /* Calculate all the 6 flags from the supplied thunk parameters. */
amd64g_calculate_rflags_all(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)669 ULong amd64g_calculate_rflags_all ( ULong cc_op,
670                                     ULong cc_dep1,
671                                     ULong cc_dep2,
672                                     ULong cc_ndep )
673 {
674 #  if PROFILE_RFLAGS
675    if (!initted) initCounts();
676    n_calc_all++;
677    if (SHOW_COUNTS_NOW) showCounts();
678 #  endif
679    return
680       amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
681 }
682 
683 
684 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
685 /* Calculate just the carry flag from the supplied thunk parameters. */
amd64g_calculate_rflags_c(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)686 ULong amd64g_calculate_rflags_c ( ULong cc_op,
687                                   ULong cc_dep1,
688                                   ULong cc_dep2,
689                                   ULong cc_ndep )
690 {
691 #  if PROFILE_RFLAGS
692    if (!initted) initCounts();
693    n_calc_c++;
694    tabc_fast[cc_op]++;
695    if (SHOW_COUNTS_NOW) showCounts();
696 #  endif
697 
698    /* Fast-case some common ones. */
699    switch (cc_op) {
700       case AMD64G_CC_OP_COPY:
701          return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
702       case AMD64G_CC_OP_LOGICQ:
703       case AMD64G_CC_OP_LOGICL:
704       case AMD64G_CC_OP_LOGICW:
705       case AMD64G_CC_OP_LOGICB:
706          return 0;
707 	 //      case AMD64G_CC_OP_SUBL:
708 	 //         return ((UInt)cc_dep1) < ((UInt)cc_dep2)
709 	 //                   ? AMD64G_CC_MASK_C : 0;
710 	 //      case AMD64G_CC_OP_SUBW:
711 	 //         return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
712 	 //                   ? AMD64G_CC_MASK_C : 0;
713 	 //      case AMD64G_CC_OP_SUBB:
714 	 //         return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
715 	 //                   ? AMD64G_CC_MASK_C : 0;
716 	 //      case AMD64G_CC_OP_INCL:
717 	 //      case AMD64G_CC_OP_DECL:
718 	 //         return cc_ndep & AMD64G_CC_MASK_C;
719       default:
720          break;
721    }
722 
723 #  if PROFILE_RFLAGS
724    tabc_fast[cc_op]--;
725    tabc_slow[cc_op]++;
726 #  endif
727 
728    return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
729           & AMD64G_CC_MASK_C;
730 }
731 
732 
733 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
734 /* returns 1 or 0 */
amd64g_calculate_condition(ULong cond,ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)735 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
736                                    ULong cc_op,
737                                    ULong cc_dep1,
738                                    ULong cc_dep2,
739                                    ULong cc_ndep )
740 {
741    ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
742                                                   cc_dep2, cc_ndep);
743    ULong of,sf,zf,cf,pf;
744    ULong inv = cond & 1;
745 
746 #  if PROFILE_RFLAGS
747    if (!initted) initCounts();
748    tab_cond[cc_op][cond]++;
749    n_calc_cond++;
750    if (SHOW_COUNTS_NOW) showCounts();
751 #  endif
752 
753    switch (cond) {
754       case AMD64CondNO:
755       case AMD64CondO: /* OF == 1 */
756          of = rflags >> AMD64G_CC_SHIFT_O;
757          return 1 & (inv ^ of);
758 
759       case AMD64CondNZ:
760       case AMD64CondZ: /* ZF == 1 */
761          zf = rflags >> AMD64G_CC_SHIFT_Z;
762          return 1 & (inv ^ zf);
763 
764       case AMD64CondNB:
765       case AMD64CondB: /* CF == 1 */
766          cf = rflags >> AMD64G_CC_SHIFT_C;
767          return 1 & (inv ^ cf);
768          break;
769 
770       case AMD64CondNBE:
771       case AMD64CondBE: /* (CF or ZF) == 1 */
772          cf = rflags >> AMD64G_CC_SHIFT_C;
773          zf = rflags >> AMD64G_CC_SHIFT_Z;
774          return 1 & (inv ^ (cf | zf));
775          break;
776 
777       case AMD64CondNS:
778       case AMD64CondS: /* SF == 1 */
779          sf = rflags >> AMD64G_CC_SHIFT_S;
780          return 1 & (inv ^ sf);
781 
782       case AMD64CondNP:
783       case AMD64CondP: /* PF == 1 */
784          pf = rflags >> AMD64G_CC_SHIFT_P;
785          return 1 & (inv ^ pf);
786 
787       case AMD64CondNL:
788       case AMD64CondL: /* (SF xor OF) == 1 */
789          sf = rflags >> AMD64G_CC_SHIFT_S;
790          of = rflags >> AMD64G_CC_SHIFT_O;
791          return 1 & (inv ^ (sf ^ of));
792          break;
793 
794       case AMD64CondNLE:
795       case AMD64CondLE: /* ((SF xor OF) or ZF)  == 1 */
796          sf = rflags >> AMD64G_CC_SHIFT_S;
797          of = rflags >> AMD64G_CC_SHIFT_O;
798          zf = rflags >> AMD64G_CC_SHIFT_Z;
799          return 1 & (inv ^ ((sf ^ of) | zf));
800          break;
801 
802       default:
803          /* shouldn't really make these calls from generated code */
804          vex_printf("amd64g_calculate_condition"
805                     "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
806                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
807          vpanic("amd64g_calculate_condition");
808    }
809 }
810 
811 
812 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_get_rflags(VexGuestAMD64State * vex_state)813 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/VexGuestAMD64State* vex_state )
814 {
815    ULong rflags = amd64g_calculate_rflags_all_WRK(
816                      vex_state->guest_CC_OP,
817                      vex_state->guest_CC_DEP1,
818                      vex_state->guest_CC_DEP2,
819                      vex_state->guest_CC_NDEP
820                   );
821    Long dflag = vex_state->guest_DFLAG;
822    vassert(dflag == 1 || dflag == -1);
823    if (dflag == -1)
824       rflags |= (1<<10);
825    if (vex_state->guest_IDFLAG == 1)
826       rflags |= (1<<21);
827    if (vex_state->guest_ACFLAG == 1)
828       rflags |= (1<<18);
829 
830    return rflags;
831 }
832 
833 /* VISIBLE TO LIBVEX CLIENT */
834 void
LibVEX_GuestAMD64_put_rflag_c(ULong new_carry_flag,VexGuestAMD64State * vex_state)835 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
836                                /*MOD*/VexGuestAMD64State* vex_state )
837 {
838    ULong oszacp = amd64g_calculate_rflags_all_WRK(
839                      vex_state->guest_CC_OP,
840                      vex_state->guest_CC_DEP1,
841                      vex_state->guest_CC_DEP2,
842                      vex_state->guest_CC_NDEP
843                   );
844    if (new_carry_flag & 1) {
845       oszacp |= AMD64G_CC_MASK_C;
846    } else {
847       oszacp &= ~AMD64G_CC_MASK_C;
848    }
849    vex_state->guest_CC_OP   = AMD64G_CC_OP_COPY;
850    vex_state->guest_CC_DEP1 = oszacp;
851    vex_state->guest_CC_DEP2 = 0;
852    vex_state->guest_CC_NDEP = 0;
853 }
854 
855 
856 /*---------------------------------------------------------------*/
857 /*--- %rflags translation-time function specialisers.         ---*/
858 /*--- These help iropt specialise calls the above run-time    ---*/
859 /*--- %rflags functions.                                      ---*/
860 /*---------------------------------------------------------------*/
861 
862 /* Used by the optimiser to try specialisations.  Returns an
863    equivalent expression, or NULL if none. */
864 
isU64(IRExpr * e,ULong n)865 static Bool isU64 ( IRExpr* e, ULong n )
866 {
867    return toBool( e->tag == Iex_Const
868                   && e->Iex.Const.con->tag == Ico_U64
869                   && e->Iex.Const.con->Ico.U64 == n );
870 }
871 
guest_amd64_spechelper(HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)872 IRExpr* guest_amd64_spechelper ( HChar* function_name,
873                                  IRExpr** args,
874                                  IRStmt** precedingStmts,
875                                  Int      n_precedingStmts )
876 {
877 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
878 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
879 #  define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
880 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
881 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
882 
883    Int i, arity = 0;
884    for (i = 0; args[i]; i++)
885       arity++;
886 #  if 0
887    vex_printf("spec request:\n");
888    vex_printf("   %s  ", function_name);
889    for (i = 0; i < arity; i++) {
890       vex_printf("  ");
891       ppIRExpr(args[i]);
892    }
893    vex_printf("\n");
894 #  endif
895 
896    /* --------- specialising "amd64g_calculate_condition" --------- */
897 
898    if (vex_streq(function_name, "amd64g_calculate_condition")) {
899       /* specialise calls to above "calculate condition" function */
900       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
901       vassert(arity == 5);
902       cond    = args[0];
903       cc_op   = args[1];
904       cc_dep1 = args[2];
905       cc_dep2 = args[3];
906 
907       /*---------------- ADDQ ----------------*/
908 
909       if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
910          /* long long add, then Z --> test (dst+src == 0) */
911          return unop(Iop_1Uto64,
912                      binop(Iop_CmpEQ64,
913                            binop(Iop_Add64, cc_dep1, cc_dep2),
914                            mkU64(0)));
915       }
916 
917       /*---------------- SUBQ ----------------*/
918 
919       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
920          /* long long sub/cmp, then Z --> test dst==src */
921          return unop(Iop_1Uto64,
922                      binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
923       }
924       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
925          /* long long sub/cmp, then NZ --> test dst!=src */
926          return unop(Iop_1Uto64,
927                      binop(Iop_CmpNE64,cc_dep1,cc_dep2));
928       }
929 
930       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
931          /* long long sub/cmp, then L (signed less than)
932             --> test dst <s src */
933          return unop(Iop_1Uto64,
934                      binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
935       }
936 
937       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
938          /* long long sub/cmp, then B (unsigned less than)
939             --> test dst <u src */
940          return unop(Iop_1Uto64,
941                      binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
942       }
943       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
944          /* long long sub/cmp, then NB (unsigned greater than or equal)
945             --> test src <=u dst */
946          /* Note, args are opposite way round from the usual */
947          return unop(Iop_1Uto64,
948                      binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
949       }
950 
951       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
952          /* long long sub/cmp, then BE (unsigned less than or equal)
953             --> test dst <=u src */
954          return unop(Iop_1Uto64,
955                      binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
956       }
957 
958       /*---------------- SUBL ----------------*/
959 
960       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
961          /* long sub/cmp, then Z --> test dst==src */
962          return unop(Iop_1Uto64,
963                      binop(Iop_CmpEQ32,
964                            unop(Iop_64to32, cc_dep1),
965                            unop(Iop_64to32, cc_dep2)));
966       }
967       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
968          /* long sub/cmp, then NZ --> test dst!=src */
969          return unop(Iop_1Uto64,
970                      binop(Iop_CmpNE32,
971                            unop(Iop_64to32, cc_dep1),
972                            unop(Iop_64to32, cc_dep2)));
973       }
974 
975       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
976          /* long sub/cmp, then L (signed less than)
977             --> test dst <s src */
978          return unop(Iop_1Uto64,
979                      binop(Iop_CmpLT32S,
980                            unop(Iop_64to32, cc_dep1),
981                            unop(Iop_64to32, cc_dep2)));
982       }
983 
984       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
985          /* long sub/cmp, then LE (signed less than or equal)
986             --> test dst <=s src */
987          return unop(Iop_1Uto64,
988                      binop(Iop_CmpLE32S,
989                            unop(Iop_64to32, cc_dep1),
990                            unop(Iop_64to32, cc_dep2)));
991 
992       }
993       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
994          /* long sub/cmp, then NLE (signed greater than)
995             --> test !(dst <=s src)
996             --> test (dst >s src)
997             --> test (src <s dst) */
998          return unop(Iop_1Uto64,
999                      binop(Iop_CmpLT32S,
1000                            unop(Iop_64to32, cc_dep2),
1001                            unop(Iop_64to32, cc_dep1)));
1002 
1003       }
1004 
1005       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
1006          /* long sub/cmp, then BE (unsigned less than or equal)
1007             --> test dst <=u src */
1008          return unop(Iop_1Uto64,
1009                      binop(Iop_CmpLE32U,
1010                            unop(Iop_64to32, cc_dep1),
1011                            unop(Iop_64to32, cc_dep2)));
1012       }
1013       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
1014          /* long sub/cmp, then NBE (unsigned greater than)
1015             --> test src <u dst */
1016          /* Note, args are opposite way round from the usual */
1017          return unop(Iop_1Uto64,
1018                      binop(Iop_CmpLT32U,
1019                            unop(Iop_64to32, cc_dep2),
1020                            unop(Iop_64to32, cc_dep1)));
1021       }
1022 
1023       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
1024          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
1025          return unop(Iop_1Uto64,
1026                      binop(Iop_CmpLT32S,
1027                            binop(Iop_Sub32,
1028                                  unop(Iop_64to32, cc_dep1),
1029                                  unop(Iop_64to32, cc_dep2)),
1030                            mkU32(0)));
1031       }
1032 
1033       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
1034          /* long sub/cmp, then B (unsigned less than)
1035             --> test dst <u src */
1036          return unop(Iop_1Uto64,
1037                      binop(Iop_CmpLT32U,
1038                            unop(Iop_64to32, cc_dep1),
1039                            unop(Iop_64to32, cc_dep2)));
1040       }
1041 
1042       /*---------------- SUBW ----------------*/
1043 
1044       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
1045          /* word sub/cmp, then Z --> test dst==src */
1046          return unop(Iop_1Uto64,
1047                      binop(Iop_CmpEQ16,
1048                            unop(Iop_64to16,cc_dep1),
1049                            unop(Iop_64to16,cc_dep2)));
1050       }
1051       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
1052          /* word sub/cmp, then NZ --> test dst!=src */
1053          return unop(Iop_1Uto64,
1054                      binop(Iop_CmpNE16,
1055                            unop(Iop_64to16,cc_dep1),
1056                            unop(Iop_64to16,cc_dep2)));
1057       }
1058 
1059       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
1060          /* word sub/cmp, then LE (signed less than or equal)
1061             --> test dst <=s src */
1062          return unop(Iop_1Uto64,
1063                      binop(Iop_CmpLE64S,
1064                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
1065                            binop(Iop_Shl64,cc_dep2,mkU8(48))));
1066 
1067       }
1068 
1069       /*---------------- SUBB ----------------*/
1070 
1071       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
1072          /* byte sub/cmp, then Z --> test dst==src */
1073          return unop(Iop_1Uto64,
1074                      binop(Iop_CmpEQ8,
1075                            unop(Iop_64to8,cc_dep1),
1076                            unop(Iop_64to8,cc_dep2)));
1077       }
1078       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
1079          /* byte sub/cmp, then NZ --> test dst!=src */
1080          return unop(Iop_1Uto64,
1081                      binop(Iop_CmpNE8,
1082                            unop(Iop_64to8,cc_dep1),
1083                            unop(Iop_64to8,cc_dep2)));
1084       }
1085 
1086       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
1087          /* byte sub/cmp, then BE (unsigned less than or equal)
1088             --> test dst <=u src */
1089          return unop(Iop_1Uto64,
1090                      binop(Iop_CmpLE64U,
1091                            binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1092                            binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1093       }
1094 
1095       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
1096                                           && isU64(cc_dep2, 0)) {
1097          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1098                                          --> test dst <s 0
1099                                          --> (ULong)dst[7]
1100             This is yet another scheme by which gcc figures out if the
1101             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
1102          /* Note: isU64(cc_dep2, 0) is correct, even though this is
1103             for an 8-bit comparison, since the args to the helper
1104             function are always U64s. */
1105          return binop(Iop_And64,
1106                       binop(Iop_Shr64,cc_dep1,mkU8(7)),
1107                       mkU64(1));
1108       }
1109       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
1110                                           && isU64(cc_dep2, 0)) {
1111          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1112                                           --> test !(dst <s 0)
1113                                           --> (ULong) !dst[7]
1114          */
1115          return binop(Iop_Xor64,
1116                       binop(Iop_And64,
1117                             binop(Iop_Shr64,cc_dep1,mkU8(7)),
1118                             mkU64(1)),
1119                       mkU64(1));
1120       }
1121 
1122       /*---------------- LOGICQ ----------------*/
1123 
1124       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
1125          /* long long and/or/xor, then Z --> test dst==0 */
1126          return unop(Iop_1Uto64,
1127                      binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1128       }
1129       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
1130          /* long long and/or/xor, then NZ --> test dst!=0 */
1131          return unop(Iop_1Uto64,
1132                      binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1133       }
1134 
1135       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
1136          /* long long and/or/xor, then L
1137             LOGIC sets SF and ZF according to the
1138             result and makes OF be zero.  L computes SF ^ OF, but
1139             OF is zero, so this reduces to SF -- which will be 1 iff
1140             the result is < signed 0.  Hence ...
1141          */
1142          return unop(Iop_1Uto64,
1143                      binop(Iop_CmpLT64S,
1144                            cc_dep1,
1145                            mkU64(0)));
1146       }
1147 
1148       /*---------------- LOGICL ----------------*/
1149 
1150       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
1151          /* long and/or/xor, then Z --> test dst==0 */
1152          return unop(Iop_1Uto64,
1153                      binop(Iop_CmpEQ32,
1154                            unop(Iop_64to32, cc_dep1),
1155                            mkU32(0)));
1156       }
1157       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
1158          /* long and/or/xor, then NZ --> test dst!=0 */
1159          return unop(Iop_1Uto64,
1160                      binop(Iop_CmpNE32,
1161                            unop(Iop_64to32, cc_dep1),
1162                            mkU32(0)));
1163       }
1164 
1165       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
1166          /* long and/or/xor, then LE
1167             This is pretty subtle.  LOGIC sets SF and ZF according to the
1168             result and makes OF be zero.  LE computes (SF ^ OF) | ZF, but
1169             OF is zero, so this reduces to SF | ZF -- which will be 1 iff
1170             the result is <=signed 0.  Hence ...
1171          */
1172          return unop(Iop_1Uto64,
1173                      binop(Iop_CmpLE32S,
1174                            unop(Iop_64to32, cc_dep1),
1175                            mkU32(0)));
1176       }
1177 
1178       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
1179          /* long and/or/xor, then S --> (ULong)result[31] */
1180          return binop(Iop_And64,
1181                       binop(Iop_Shr64, cc_dep1, mkU8(31)),
1182                       mkU64(1));
1183       }
1184       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
1185          /* long and/or/xor, then S --> (ULong) ~ result[31] */
1186          return binop(Iop_Xor64,
1187                 binop(Iop_And64,
1188                       binop(Iop_Shr64, cc_dep1, mkU8(31)),
1189                       mkU64(1)),
1190                 mkU64(1));
1191       }
1192 
1193       /*---------------- LOGICB ----------------*/
1194 
1195       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
1196          /* byte and/or/xor, then Z --> test dst==0 */
1197          return unop(Iop_1Uto64,
1198                      binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
1199                                         mkU64(0)));
1200       }
1201       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
1202          /* byte and/or/xor, then NZ --> test dst!=0 */
1203          return unop(Iop_1Uto64,
1204                      binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
1205                                         mkU64(0)));
1206       }
1207 
1208       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
1209          /* this is an idiom gcc sometimes uses to find out if the top
1210             bit of a byte register is set: eg testb %al,%al; js ..
1211             Since it just depends on the top bit of the byte, extract
1212             that bit and explicitly get rid of all the rest.  This
1213             helps memcheck avoid false positives in the case where any
1214             of the other bits in the byte are undefined. */
1215          /* byte and/or/xor, then S --> (UInt)result[7] */
1216          return binop(Iop_And64,
1217                       binop(Iop_Shr64,cc_dep1,mkU8(7)),
1218                       mkU64(1));
1219       }
1220       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
1221          /* byte and/or/xor, then NS --> (UInt)!result[7] */
1222          return binop(Iop_Xor64,
1223                       binop(Iop_And64,
1224                             binop(Iop_Shr64,cc_dep1,mkU8(7)),
1225                             mkU64(1)),
1226                       mkU64(1));
1227       }
1228 
1229       /*---------------- INCB ----------------*/
1230 
1231       if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
1232          /* 8-bit inc, then LE --> sign bit of the arg */
1233          return binop(Iop_And64,
1234                       binop(Iop_Shr64,
1235                             binop(Iop_Sub64, cc_dep1, mkU64(1)),
1236                             mkU8(7)),
1237                       mkU64(1));
1238       }
1239 
1240       /*---------------- INCW ----------------*/
1241 
1242       if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
1243          /* 16-bit inc, then Z --> test dst == 0 */
1244          return unop(Iop_1Uto64,
1245                      binop(Iop_CmpEQ64,
1246                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
1247                            mkU64(0)));
1248       }
1249 
1250       /*---------------- DECL ----------------*/
1251 
1252       if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
1253          /* dec L, then Z --> test dst == 0 */
1254          return unop(Iop_1Uto64,
1255                      binop(Iop_CmpEQ32,
1256                            unop(Iop_64to32, cc_dep1),
1257                            mkU32(0)));
1258       }
1259 
1260       /*---------------- DECW ----------------*/
1261 
1262       if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
1263          /* 16-bit dec, then NZ --> test dst != 0 */
1264          return unop(Iop_1Uto64,
1265                      binop(Iop_CmpNE64,
1266                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
1267                            mkU64(0)));
1268       }
1269 
1270       /*---------------- COPY ----------------*/
1271       /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1272          jbe" for example. */
1273 
1274       if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
1275           (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
1276          /* COPY, then BE --> extract C and Z from dep1, and test (C
1277             or Z == 1). */
1278          /* COPY, then NBE --> extract C and Z from dep1, and test (C
1279             or Z == 0). */
1280          ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
1281          return
1282             unop(
1283                Iop_1Uto64,
1284                binop(
1285                   Iop_CmpEQ64,
1286                   binop(
1287                      Iop_And64,
1288                      binop(
1289                         Iop_Or64,
1290                         binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1291                         binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
1292                      ),
1293                      mkU64(1)
1294                   ),
1295                   mkU64(nnn)
1296                )
1297             );
1298       }
1299 
1300       if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
1301          /* COPY, then B --> extract C dep1, and test (C == 1). */
1302          return
1303             unop(
1304                Iop_1Uto64,
1305                binop(
1306                   Iop_CmpNE64,
1307                   binop(
1308                      Iop_And64,
1309                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1310                      mkU64(1)
1311                   ),
1312                   mkU64(0)
1313                )
1314             );
1315       }
1316 
1317       if (isU64(cc_op, AMD64G_CC_OP_COPY)
1318           && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
1319          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1320          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1321          UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
1322          return
1323             unop(
1324                Iop_1Uto64,
1325                binop(
1326                   Iop_CmpEQ64,
1327                   binop(
1328                      Iop_And64,
1329                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
1330                      mkU64(1)
1331                   ),
1332                   mkU64(nnn)
1333                )
1334             );
1335       }
1336 
1337       if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
1338          /* COPY, then P --> extract P from dep1, and test (P == 1). */
1339          return
1340             unop(
1341                Iop_1Uto64,
1342                binop(
1343                   Iop_CmpNE64,
1344                   binop(
1345                      Iop_And64,
1346                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
1347                      mkU64(1)
1348                   ),
1349                   mkU64(0)
1350                )
1351             );
1352       }
1353 
1354       return NULL;
1355    }
1356 
1357    /* --------- specialising "amd64g_calculate_rflags_c" --------- */
1358 
1359    if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
1360       /* specialise calls to above "calculate_rflags_c" function */
1361       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1362       vassert(arity == 4);
1363       cc_op   = args[0];
1364       cc_dep1 = args[1];
1365       cc_dep2 = args[2];
1366       cc_ndep = args[3];
1367 
1368       if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
1369          /* C after sub denotes unsigned less than */
1370          return unop(Iop_1Uto64,
1371                      binop(Iop_CmpLT64U,
1372                            cc_dep1,
1373                            cc_dep2));
1374       }
1375       if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
1376          /* C after sub denotes unsigned less than */
1377          return unop(Iop_1Uto64,
1378                      binop(Iop_CmpLT32U,
1379                            unop(Iop_64to32, cc_dep1),
1380                            unop(Iop_64to32, cc_dep2)));
1381       }
1382       if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
1383          /* C after sub denotes unsigned less than */
1384          return unop(Iop_1Uto64,
1385                      binop(Iop_CmpLT64U,
1386                            binop(Iop_And64,cc_dep1,mkU64(0xFF)),
1387                            binop(Iop_And64,cc_dep2,mkU64(0xFF))));
1388       }
1389       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
1390           || isU64(cc_op, AMD64G_CC_OP_LOGICL)
1391           || isU64(cc_op, AMD64G_CC_OP_LOGICW)
1392           || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
1393          /* cflag after logic is zero */
1394          return mkU64(0);
1395       }
1396       if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
1397           || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
1398          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1399          return cc_ndep;
1400       }
1401 
1402 #     if 0
1403       if (cc_op->tag == Iex_Const) {
1404          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1405       }
1406 #     endif
1407 
1408       return NULL;
1409    }
1410 
1411 #  undef unop
1412 #  undef binop
1413 #  undef mkU64
1414 #  undef mkU32
1415 #  undef mkU8
1416 
1417    return NULL;
1418 }
1419 
1420 
1421 /*---------------------------------------------------------------*/
1422 /*--- Supporting functions for x87 FPU activities.            ---*/
1423 /*---------------------------------------------------------------*/
1424 
host_is_little_endian(void)1425 static inline Bool host_is_little_endian ( void )
1426 {
1427    UInt x = 0x76543210;
1428    UChar* p = (UChar*)(&x);
1429    return toBool(*p == 0x10);
1430 }
1431 
1432 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1433 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_FXAM(ULong tag,ULong dbl)1434 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
1435 {
1436    Bool   mantissaIsZero;
1437    Int    bexp;
1438    UChar  sign;
1439    UChar* f64;
1440 
1441    vassert(host_is_little_endian());
1442 
1443    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1444 
1445    f64  = (UChar*)(&dbl);
1446    sign = toUChar( (f64[7] >> 7) & 1 );
1447 
1448    /* First off, if the tag indicates the register was empty,
1449       return 1,0,sign,1 */
1450    if (tag == 0) {
1451       /* vex_printf("Empty\n"); */
1452       return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
1453                                    | AMD64G_FC_MASK_C0;
1454    }
1455 
1456    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1457    bexp &= 0x7FF;
1458 
1459    mantissaIsZero
1460       = toBool(
1461            (f64[6] & 0x0F) == 0
1462            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1463         );
1464 
1465    /* If both exponent and mantissa are zero, the value is zero.
1466       Return 1,0,sign,0. */
1467    if (bexp == 0 && mantissaIsZero) {
1468       /* vex_printf("Zero\n"); */
1469       return AMD64G_FC_MASK_C3 | 0
1470                                | (sign << AMD64G_FC_SHIFT_C1) | 0;
1471    }
1472 
1473    /* If exponent is zero but mantissa isn't, it's a denormal.
1474       Return 1,1,sign,0. */
1475    if (bexp == 0 && !mantissaIsZero) {
1476       /* vex_printf("Denormal\n"); */
1477       return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
1478                                | (sign << AMD64G_FC_SHIFT_C1) | 0;
1479    }
1480 
1481    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1482       Return 0,1,sign,1. */
1483    if (bexp == 0x7FF && mantissaIsZero) {
1484       /* vex_printf("Inf\n"); */
1485       return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
1486                                    | AMD64G_FC_MASK_C0;
1487    }
1488 
1489    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1490       Return 0,0,sign,1. */
1491    if (bexp == 0x7FF && !mantissaIsZero) {
1492       /* vex_printf("NaN\n"); */
1493       return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
1494    }
1495 
1496    /* Uh, ok, we give up.  It must be a normal finite number.
1497       Return 0,1,sign,0.
1498    */
1499    /* vex_printf("normal\n"); */
1500    return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1501 }
1502 
1503 
1504 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
1505    appears to differ from the former only in that the 8 FP registers
1506    themselves are not transferred into the guest state. */
1507 static
do_put_x87(Bool moveRegs,UChar * x87_state,VexGuestAMD64State * vex_state)1508 VexEmWarn do_put_x87 ( Bool moveRegs,
1509                        /*IN*/UChar* x87_state,
1510                        /*OUT*/VexGuestAMD64State* vex_state )
1511 {
1512    Int        stno, preg;
1513    UInt       tag;
1514    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1515    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1516    Fpu_State* x87     = (Fpu_State*)x87_state;
1517    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
1518    UInt       tagw    = x87->env[FP_ENV_TAG];
1519    UInt       fpucw   = x87->env[FP_ENV_CTRL];
1520    UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
1521    VexEmWarn  ew;
1522    UInt       fpround;
1523    ULong      pair;
1524 
1525    /* Copy registers and tags */
1526    for (stno = 0; stno < 8; stno++) {
1527       preg = (stno + ftop) & 7;
1528       tag = (tagw >> (2*preg)) & 3;
1529       if (tag == 3) {
1530          /* register is empty */
1531          /* hmm, if it's empty, does it still get written?  Probably
1532             safer to say it does.  If we don't, memcheck could get out
1533             of sync, in that it thinks all FP registers are defined by
1534             this helper, but in reality some have not been updated. */
1535          if (moveRegs)
1536             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1537          vexTags[preg] = 0;
1538       } else {
1539          /* register is non-empty */
1540          if (moveRegs)
1541             convert_f80le_to_f64le( &x87->reg[10*stno],
1542                                     (UChar*)&vexRegs[preg] );
1543          vexTags[preg] = 1;
1544       }
1545    }
1546 
1547    /* stack pointer */
1548    vex_state->guest_FTOP = ftop;
1549 
1550    /* status word */
1551    vex_state->guest_FC3210 = c3210;
1552 
1553    /* handle the control word, setting FPROUND and detecting any
1554       emulation warnings. */
1555    pair    = amd64g_check_fldcw ( (ULong)fpucw );
1556    fpround = (UInt)pair;
1557    ew      = (VexEmWarn)(pair >> 32);
1558 
1559    vex_state->guest_FPROUND = fpround & 3;
1560 
1561    /* emulation warnings --> caller */
1562    return ew;
1563 }
1564 
1565 
1566 /* Create an x87 FPU state from the guest state, as close as
1567    we can approximate it. */
1568 static
do_get_x87(VexGuestAMD64State * vex_state,UChar * x87_state)1569 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
1570                   /*OUT*/UChar* x87_state )
1571 {
1572    Int        i, stno, preg;
1573    UInt       tagw;
1574    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1575    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1576    Fpu_State* x87     = (Fpu_State*)x87_state;
1577    UInt       ftop    = vex_state->guest_FTOP;
1578    UInt       c3210   = vex_state->guest_FC3210;
1579 
1580    for (i = 0; i < 14; i++)
1581       x87->env[i] = 0;
1582 
1583    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1584    x87->env[FP_ENV_STAT]
1585       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1586    x87->env[FP_ENV_CTRL]
1587       = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
1588 
1589    /* Dump the register stack in ST order. */
1590    tagw = 0;
1591    for (stno = 0; stno < 8; stno++) {
1592       preg = (stno + ftop) & 7;
1593       if (vexTags[preg] == 0) {
1594          /* register is empty */
1595          tagw |= (3 << (2*preg));
1596          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1597                                  &x87->reg[10*stno] );
1598       } else {
1599          /* register is full. */
1600          tagw |= (0 << (2*preg));
1601          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1602                                  &x87->reg[10*stno] );
1603       }
1604    }
1605    x87->env[FP_ENV_TAG] = toUShort(tagw);
1606 }
1607 
1608 
1609 /* CALLED FROM GENERATED CODE */
1610 /* DIRTY HELPER (reads guest state, writes guest mem) */
1611 /* NOTE: only handles 32-bit format (no REX.W on the insn) */
amd64g_dirtyhelper_FXSAVE(VexGuestAMD64State * gst,HWord addr)1612 void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr )
1613 {
1614    /* Derived from values obtained from
1615       vendor_id       : AuthenticAMD
1616       cpu family      : 15
1617       model           : 12
1618       model name      : AMD Athlon(tm) 64 Processor 3200+
1619       stepping        : 0
1620       cpu MHz         : 2200.000
1621       cache size      : 512 KB
1622    */
1623    /* Somewhat roundabout, but at least it's simple. */
1624    Fpu_State tmp;
1625    UShort*   addrS = (UShort*)addr;
1626    UChar*    addrC = (UChar*)addr;
1627    U128*     xmm   = (U128*)(addr + 160);
1628    UInt      mxcsr;
1629    UShort    fp_tags;
1630    UInt      summary_tags;
1631    Int       r, stno;
1632    UShort    *srcS, *dstS;
1633 
1634    do_get_x87( gst, (UChar*)&tmp );
1635    mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
1636 
1637    /* Now build the proper fxsave image from the x87 image we just
1638       made. */
1639 
1640    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1641    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1642 
1643    /* set addrS[2] in an endian-independent way */
1644    summary_tags = 0;
1645    fp_tags = tmp.env[FP_ENV_TAG];
1646    for (r = 0; r < 8; r++) {
1647       if ( ((fp_tags >> (2*r)) & 3) != 3 )
1648          summary_tags |= (1 << r);
1649    }
1650    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
1651    addrC[5]  = 0; /* pad */
1652 
1653    /* FOP: faulting fpu opcode.  From experimentation, the real CPU
1654       does not write this field. (?!) */
1655    addrS[3]  = 0; /* BOGUS */
1656 
1657    /* RIP (Last x87 instruction pointer).  From experimentation, the
1658       real CPU does not write this field. (?!) */
1659    addrS[4]  = 0; /* BOGUS */
1660    addrS[5]  = 0; /* BOGUS */
1661    addrS[6]  = 0; /* BOGUS */
1662    addrS[7]  = 0; /* BOGUS */
1663 
1664    /* RDP (Last x87 data pointer).  From experimentation, the real CPU
1665       does not write this field. (?!) */
1666    addrS[8]  = 0; /* BOGUS */
1667    addrS[9]  = 0; /* BOGUS */
1668    addrS[10] = 0; /* BOGUS */
1669    addrS[11] = 0; /* BOGUS */
1670 
1671    addrS[12] = toUShort(mxcsr);  /* MXCSR */
1672    addrS[13] = toUShort(mxcsr >> 16);
1673 
1674    addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
1675    addrS[15] = 0x0000; /* MXCSR mask (hi16) */
1676 
1677    /* Copy in the FP registers, in ST order. */
1678    for (stno = 0; stno < 8; stno++) {
1679       srcS = (UShort*)(&tmp.reg[10*stno]);
1680       dstS = (UShort*)(&addrS[16 + 8*stno]);
1681       dstS[0] = srcS[0];
1682       dstS[1] = srcS[1];
1683       dstS[2] = srcS[2];
1684       dstS[3] = srcS[3];
1685       dstS[4] = srcS[4];
1686       dstS[5] = 0;
1687       dstS[6] = 0;
1688       dstS[7] = 0;
1689    }
1690 
1691    /* That's the first 160 bytes of the image done.  Now only %xmm0
1692       .. %xmm15 remain to be copied.  If the host is big-endian, these
1693       need to be byte-swapped. */
1694    vassert(host_is_little_endian());
1695 
1696 #  define COPY_U128(_dst,_src)                       \
1697       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1698            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1699       while (0)
1700 
1701    COPY_U128( xmm[0],  gst->guest_XMM0 );
1702    COPY_U128( xmm[1],  gst->guest_XMM1 );
1703    COPY_U128( xmm[2],  gst->guest_XMM2 );
1704    COPY_U128( xmm[3],  gst->guest_XMM3 );
1705    COPY_U128( xmm[4],  gst->guest_XMM4 );
1706    COPY_U128( xmm[5],  gst->guest_XMM5 );
1707    COPY_U128( xmm[6],  gst->guest_XMM6 );
1708    COPY_U128( xmm[7],  gst->guest_XMM7 );
1709    COPY_U128( xmm[8],  gst->guest_XMM8 );
1710    COPY_U128( xmm[9],  gst->guest_XMM9 );
1711    COPY_U128( xmm[10], gst->guest_XMM10 );
1712    COPY_U128( xmm[11], gst->guest_XMM11 );
1713    COPY_U128( xmm[12], gst->guest_XMM12 );
1714    COPY_U128( xmm[13], gst->guest_XMM13 );
1715    COPY_U128( xmm[14], gst->guest_XMM14 );
1716    COPY_U128( xmm[15], gst->guest_XMM15 );
1717 
1718 #  undef COPY_U128
1719 }
1720 
1721 
1722 /* CALLED FROM GENERATED CODE */
1723 /* DIRTY HELPER (writes guest state, reads guest mem) */
amd64g_dirtyhelper_FXRSTOR(VexGuestAMD64State * gst,HWord addr)1724 VexEmWarn amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State* gst, HWord addr )
1725 {
1726    Fpu_State tmp;
1727    VexEmWarn warnX87 = EmWarn_NONE;
1728    VexEmWarn warnXMM = EmWarn_NONE;
1729    UShort*   addrS   = (UShort*)addr;
1730    UChar*    addrC   = (UChar*)addr;
1731    U128*     xmm     = (U128*)(addr + 160);
1732    UShort    fp_tags;
1733    Int       r, stno, i;
1734 
1735    /* Restore %xmm0 .. %xmm15.  If the host is big-endian, these need
1736       to be byte-swapped. */
1737    vassert(host_is_little_endian());
1738 
1739 #  define COPY_U128(_dst,_src)                       \
1740       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1741            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1742       while (0)
1743 
1744    COPY_U128( gst->guest_XMM0, xmm[0] );
1745    COPY_U128( gst->guest_XMM1, xmm[1] );
1746    COPY_U128( gst->guest_XMM2, xmm[2] );
1747    COPY_U128( gst->guest_XMM3, xmm[3] );
1748    COPY_U128( gst->guest_XMM4, xmm[4] );
1749    COPY_U128( gst->guest_XMM5, xmm[5] );
1750    COPY_U128( gst->guest_XMM6, xmm[6] );
1751    COPY_U128( gst->guest_XMM7, xmm[7] );
1752    COPY_U128( gst->guest_XMM8, xmm[8] );
1753    COPY_U128( gst->guest_XMM9, xmm[9] );
1754    COPY_U128( gst->guest_XMM10, xmm[10] );
1755    COPY_U128( gst->guest_XMM11, xmm[11] );
1756    COPY_U128( gst->guest_XMM12, xmm[12] );
1757    COPY_U128( gst->guest_XMM13, xmm[13] );
1758    COPY_U128( gst->guest_XMM14, xmm[14] );
1759    COPY_U128( gst->guest_XMM15, xmm[15] );
1760 
1761 #  undef COPY_U128
1762 
1763    /* Copy the x87 registers out of the image, into a temporary
1764       Fpu_State struct. */
1765    for (i = 0; i < 14; i++) tmp.env[i] = 0;
1766    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1767    /* fill in tmp.reg[0..7] */
1768    for (stno = 0; stno < 8; stno++) {
1769       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1770       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1771       dstS[0] = srcS[0];
1772       dstS[1] = srcS[1];
1773       dstS[2] = srcS[2];
1774       dstS[3] = srcS[3];
1775       dstS[4] = srcS[4];
1776    }
1777    /* fill in tmp.env[0..13] */
1778    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1779    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1780 
1781    fp_tags = 0;
1782    for (r = 0; r < 8; r++) {
1783       if (addrC[4] & (1<<r))
1784          fp_tags |= (0 << (2*r)); /* EMPTY */
1785       else
1786          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1787    }
1788    tmp.env[FP_ENV_TAG] = fp_tags;
1789 
1790    /* Now write 'tmp' into the guest state. */
1791    warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
1792 
1793    { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1794                 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1795      ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
1796 
1797      warnXMM = (VexEmWarn)(w64 >> 32);
1798 
1799      gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
1800    }
1801 
1802    /* Prefer an X87 emwarn over an XMM one, if both exist. */
1803    if (warnX87 != EmWarn_NONE)
1804       return warnX87;
1805    else
1806       return warnXMM;
1807 }
1808 
1809 
1810 /* DIRTY HELPER (writes guest state) */
1811 /* Initialise the x87 FPU state as per 'finit'. */
amd64g_dirtyhelper_FINIT(VexGuestAMD64State * gst)1812 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
1813 {
1814    Int i;
1815    gst->guest_FTOP = 0;
1816    for (i = 0; i < 8; i++) {
1817       gst->guest_FPTAG[i] = 0; /* empty */
1818       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1819    }
1820    gst->guest_FPROUND = (ULong)Irrm_NEAREST;
1821    gst->guest_FC3210  = 0;
1822 }
1823 
1824 
1825 /* CALLED FROM GENERATED CODE */
1826 /* DIRTY HELPER (reads guest memory) */
amd64g_dirtyhelper_loadF80le(ULong addrU)1827 ULong amd64g_dirtyhelper_loadF80le ( ULong addrU )
1828 {
1829    ULong f64;
1830    convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 );
1831    return f64;
1832 }
1833 
1834 /* CALLED FROM GENERATED CODE */
1835 /* DIRTY HELPER (writes guest memory) */
amd64g_dirtyhelper_storeF80le(ULong addrU,ULong f64)1836 void amd64g_dirtyhelper_storeF80le ( ULong addrU, ULong f64 )
1837 {
1838    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) );
1839 }
1840 
1841 
1842 /* CALLED FROM GENERATED CODE */
1843 /* CLEAN HELPER */
1844 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1845    Extract from it the required SSEROUND value and any resulting
1846    emulation warning, and return (warn << 32) | sseround value.
1847 */
amd64g_check_ldmxcsr(ULong mxcsr)1848 ULong amd64g_check_ldmxcsr ( ULong mxcsr )
1849 {
1850    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
1851    /* NOTE, encoded exactly as per enum IRRoundingMode. */
1852    ULong rmode = (mxcsr >> 13) & 3;
1853 
1854    /* Detect any required emulation warnings. */
1855    VexEmWarn ew = EmWarn_NONE;
1856 
1857    if ((mxcsr & 0x1F80) != 0x1F80) {
1858       /* unmasked exceptions! */
1859       ew = EmWarn_X86_sseExns;
1860    }
1861    else
1862    if (mxcsr & (1<<15)) {
1863       /* FZ is set */
1864       ew = EmWarn_X86_fz;
1865    }
1866    else
1867    if (mxcsr & (1<<6)) {
1868       /* DAZ is set */
1869       ew = EmWarn_X86_daz;
1870    }
1871 
1872    return (((ULong)ew) << 32) | ((ULong)rmode);
1873 }
1874 
1875 
1876 /* CALLED FROM GENERATED CODE */
1877 /* CLEAN HELPER */
1878 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1879    native format MXCSR value. */
amd64g_create_mxcsr(ULong sseround)1880 ULong amd64g_create_mxcsr ( ULong sseround )
1881 {
1882    sseround &= 3;
1883    return 0x1F80 | (sseround << 13);
1884 }
1885 
1886 
1887 /* CLEAN HELPER */
1888 /* fpucw[15:0] contains a x87 native format FPU control word.
1889    Extract from it the required FPROUND value and any resulting
1890    emulation warning, and return (warn << 32) | fpround value.
1891 */
amd64g_check_fldcw(ULong fpucw)1892 ULong amd64g_check_fldcw ( ULong fpucw )
1893 {
1894    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
1895    /* NOTE, encoded exactly as per enum IRRoundingMode. */
1896    ULong rmode = (fpucw >> 10) & 3;
1897 
1898    /* Detect any required emulation warnings. */
1899    VexEmWarn ew = EmWarn_NONE;
1900 
1901    if ((fpucw & 0x3F) != 0x3F) {
1902       /* unmasked exceptions! */
1903       ew = EmWarn_X86_x87exns;
1904    }
1905    else
1906    if (((fpucw >> 8) & 3) != 3) {
1907       /* unsupported precision */
1908       ew = EmWarn_X86_x87precision;
1909    }
1910 
1911    return (((ULong)ew) << 32) | ((ULong)rmode);
1912 }
1913 
1914 
1915 /* CLEAN HELPER */
1916 /* Given fpround as an IRRoundingMode value, create a suitable x87
1917    native format FPU control word. */
amd64g_create_fpucw(ULong fpround)1918 ULong amd64g_create_fpucw ( ULong fpround )
1919 {
1920    fpround &= 3;
1921    return 0x037F | (fpround << 10);
1922 }
1923 
1924 
1925 /* This is used to implement 'fldenv'.
1926    Reads 28 bytes at x87_state[0 .. 27]. */
1927 /* CALLED FROM GENERATED CODE */
1928 /* DIRTY HELPER */
amd64g_dirtyhelper_FLDENV(VexGuestAMD64State * vex_state,HWord x87_state)1929 VexEmWarn amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
1930                                       /*IN*/HWord x87_state)
1931 {
1932    Int        stno, preg;
1933    UInt       tag;
1934    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1935    Fpu_State* x87     = (Fpu_State*)x87_state;
1936    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
1937    UInt       tagw    = x87->env[FP_ENV_TAG];
1938    UInt       fpucw   = x87->env[FP_ENV_CTRL];
1939    ULong      c3210   = x87->env[FP_ENV_STAT] & 0x4700;
1940    VexEmWarn  ew;
1941    ULong      fpround;
1942    ULong      pair;
1943 
1944    /* Copy tags */
1945    for (stno = 0; stno < 8; stno++) {
1946       preg = (stno + ftop) & 7;
1947       tag = (tagw >> (2*preg)) & 3;
1948       if (tag == 3) {
1949          /* register is empty */
1950          vexTags[preg] = 0;
1951       } else {
1952          /* register is non-empty */
1953          vexTags[preg] = 1;
1954       }
1955    }
1956 
1957    /* stack pointer */
1958    vex_state->guest_FTOP = ftop;
1959 
1960    /* status word */
1961    vex_state->guest_FC3210 = c3210;
1962 
1963    /* handle the control word, setting FPROUND and detecting any
1964       emulation warnings. */
1965    pair    = amd64g_check_fldcw ( (ULong)fpucw );
1966    fpround = pair & 0xFFFFFFFFULL;
1967    ew      = (VexEmWarn)(pair >> 32);
1968 
1969    vex_state->guest_FPROUND = fpround & 3;
1970 
1971    /* emulation warnings --> caller */
1972    return ew;
1973 }
1974 
1975 
1976 /* CALLED FROM GENERATED CODE */
1977 /* DIRTY HELPER */
1978 /* Create an x87 FPU env from the guest state, as close as we can
1979    approximate it.  Writes 28 bytes at x87_state[0..27]. */
amd64g_dirtyhelper_FSTENV(VexGuestAMD64State * vex_state,HWord x87_state)1980 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
1981                                  /*OUT*/HWord x87_state )
1982 {
1983    Int        i, stno, preg;
1984    UInt       tagw;
1985    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1986    Fpu_State* x87     = (Fpu_State*)x87_state;
1987    UInt       ftop    = vex_state->guest_FTOP;
1988    ULong      c3210   = vex_state->guest_FC3210;
1989 
1990    for (i = 0; i < 14; i++)
1991       x87->env[i] = 0;
1992 
1993    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1994    x87->env[FP_ENV_STAT]
1995       = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
1996    x87->env[FP_ENV_CTRL]
1997       = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
1998 
1999    /* Compute the x87 tag word. */
2000    tagw = 0;
2001    for (stno = 0; stno < 8; stno++) {
2002       preg = (stno + ftop) & 7;
2003       if (vexTags[preg] == 0) {
2004          /* register is empty */
2005          tagw |= (3 << (2*preg));
2006       } else {
2007          /* register is full. */
2008          tagw |= (0 << (2*preg));
2009       }
2010    }
2011    x87->env[FP_ENV_TAG] = toUShort(tagw);
2012 
2013    /* We don't dump the x87 registers, tho. */
2014 }
2015 
2016 
2017 /*---------------------------------------------------------------*/
2018 /*--- Misc integer helpers, including rotates and CPUID.      ---*/
2019 /*---------------------------------------------------------------*/
2020 
2021 /* Claim to be the following CPU, which is probably representative of
2022    the lowliest (earliest) amd64 offerings.  It can do neither sse3
2023    nor cx16.
2024 
2025    vendor_id       : AuthenticAMD
2026    cpu family      : 15
2027    model           : 5
2028    model name      : AMD Opteron (tm) Processor 848
2029    stepping        : 10
2030    cpu MHz         : 1797.682
2031    cache size      : 1024 KB
2032    fpu             : yes
2033    fpu_exception   : yes
2034    cpuid level     : 1
2035    wp              : yes
2036    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2037                      mtrr pge mca cmov pat pse36 clflush mmx fxsr
2038                      sse sse2 syscall nx mmxext lm 3dnowext 3dnow
2039    bogomips        : 3600.62
2040    TLB size        : 1088 4K pages
2041    clflush size    : 64
2042    cache_alignment : 64
2043    address sizes   : 40 bits physical, 48 bits virtual
2044    power management: ts fid vid ttp
2045 */
amd64g_dirtyhelper_CPUID_baseline(VexGuestAMD64State * st)2046 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
2047 {
2048 #  define SET_ABCD(_a,_b,_c,_d)                \
2049       do { st->guest_RAX = (ULong)(_a);        \
2050            st->guest_RBX = (ULong)(_b);        \
2051            st->guest_RCX = (ULong)(_c);        \
2052            st->guest_RDX = (ULong)(_d);        \
2053       } while (0)
2054 
2055    switch (0xFFFFFFFF & st->guest_RAX) {
2056       case 0x00000000:
2057          SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
2058          break;
2059       case 0x00000001:
2060          SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
2061          break;
2062       case 0x80000000:
2063          SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
2064          break;
2065       case 0x80000001:
2066          SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, 0xe1d3fbff);
2067          break;
2068       case 0x80000002:
2069          SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
2070          break;
2071       case 0x80000003:
2072          SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
2073          break;
2074       case 0x80000004:
2075          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2076          break;
2077       case 0x80000005:
2078          SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
2079          break;
2080       case 0x80000006:
2081          SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
2082          break;
2083       case 0x80000007:
2084          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
2085          break;
2086       case 0x80000008:
2087          SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
2088          break;
2089       default:
2090          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2091          break;
2092    }
2093 #  undef SET_ABCD
2094 }
2095 
2096 
2097 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
2098    capable.
2099 
2100    vendor_id       : GenuineIntel
2101    cpu family      : 6
2102    model           : 15
2103    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2104    stepping        : 6
2105    cpu MHz         : 2394.000
2106    cache size      : 4096 KB
2107    physical id     : 0
2108    siblings        : 2
2109    core id         : 0
2110    cpu cores       : 2
2111    fpu             : yes
2112    fpu_exception   : yes
2113    cpuid level     : 10
2114    wp              : yes
2115    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2116                      mtrr pge mca cmov pat pse36 clflush dts acpi
2117                      mmx fxsr sse sse2 ss ht tm syscall nx lm
2118                      constant_tsc pni monitor ds_cpl vmx est tm2
2119                      cx16 xtpr lahf_lm
2120    bogomips        : 4798.78
2121    clflush size    : 64
2122    cache_alignment : 64
2123    address sizes   : 36 bits physical, 48 bits virtual
2124    power management:
2125 */
amd64g_dirtyhelper_CPUID_sse3_and_cx16(VexGuestAMD64State * st)2126 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
2127 {
2128 #  define SET_ABCD(_a,_b,_c,_d)                \
2129       do { st->guest_RAX = (ULong)(_a);        \
2130            st->guest_RBX = (ULong)(_b);        \
2131            st->guest_RCX = (ULong)(_c);        \
2132            st->guest_RDX = (ULong)(_d);        \
2133       } while (0)
2134 
2135    switch (0xFFFFFFFF & st->guest_RAX) {
2136       case 0x00000000:
2137          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2138          break;
2139       case 0x00000001:
2140          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2141          break;
2142       case 0x00000002:
2143          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2144          break;
2145       case 0x00000003:
2146          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2147          break;
2148       case 0x00000004: {
2149          switch (0xFFFFFFFF & st->guest_RCX) {
2150             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2151                                       0x0000003f, 0x00000001); break;
2152             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2153                                       0x0000003f, 0x00000001); break;
2154             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2155                                       0x00000fff, 0x00000001); break;
2156             default:         SET_ABCD(0x00000000, 0x00000000,
2157                                       0x00000000, 0x00000000); break;
2158          }
2159          break;
2160       }
2161       case 0x00000005:
2162          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2163          break;
2164       case 0x00000006:
2165          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2166          break;
2167       case 0x00000007:
2168          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2169          break;
2170       case 0x00000008:
2171          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2172          break;
2173       case 0x00000009:
2174          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2175          break;
2176       case 0x0000000a:
2177       unhandled_eax_value:
2178          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2179          break;
2180       case 0x80000000:
2181          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2182          break;
2183       case 0x80000001:
2184          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
2185          break;
2186       case 0x80000002:
2187          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2188          break;
2189       case 0x80000003:
2190          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2191          break;
2192       case 0x80000004:
2193          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2194          break;
2195       case 0x80000005:
2196          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2197          break;
2198       case 0x80000006:
2199          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2200          break;
2201       case 0x80000007:
2202          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2203          break;
2204       case 0x80000008:
2205          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2206          break;
2207       default:
2208          goto unhandled_eax_value;
2209    }
2210 #  undef SET_ABCD
2211 }
2212 
2213 
2214 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
2215    capable.
2216 
2217    vendor_id       : GenuineIntel
2218    cpu family      : 6
2219    model           : 37
2220    model name      : Intel(R) Core(TM) i5 CPU         670  @ 3.47GHz
2221    stepping        : 2
2222    cpu MHz         : 3334.000
2223    cache size      : 4096 KB
2224    physical id     : 0
2225    siblings        : 4
2226    core id         : 0
2227    cpu cores       : 2
2228    apicid          : 0
2229    initial apicid  : 0
2230    fpu             : yes
2231    fpu_exception   : yes
2232    cpuid level     : 11
2233    wp              : yes
2234    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2235                      mtrr pge mca cmov pat pse36 clflush dts acpi
2236                      mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2237                      lm constant_tsc arch_perfmon pebs bts rep_good
2238                      xtopology nonstop_tsc aperfmperf pni pclmulqdq
2239                      dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
2240                      xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
2241                      arat tpr_shadow vnmi flexpriority ept vpid
2242                      MINUS aes (see below)
2243    bogomips        : 6957.57
2244    clflush size    : 64
2245    cache_alignment : 64
2246    address sizes   : 36 bits physical, 48 bits virtual
2247    power management:
2248 */
amd64g_dirtyhelper_CPUID_sse42_and_cx16(VexGuestAMD64State * st)2249 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
2250 {
2251 #  define SET_ABCD(_a,_b,_c,_d)                \
2252       do { st->guest_RAX = (ULong)(_a);        \
2253            st->guest_RBX = (ULong)(_b);        \
2254            st->guest_RCX = (ULong)(_c);        \
2255            st->guest_RDX = (ULong)(_d);        \
2256       } while (0)
2257 
2258    UInt old_eax = (UInt)st->guest_RAX;
2259    UInt old_ecx = (UInt)st->guest_RCX;
2260 
2261    switch (old_eax) {
2262       case 0x00000000:
2263          SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
2264          break;
2265       case 0x00000001:
2266          // & ~(1<<25): don't claim to support AES insns.  See
2267          // bug 249991.
2268          SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff & ~(1<<25),
2269                                           0xbfebfbff);
2270          break;
2271       case 0x00000002:
2272          SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
2273          break;
2274       case 0x00000003:
2275          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2276          break;
2277       case 0x00000004:
2278          switch (old_ecx) {
2279             case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2280                                       0x0000003f, 0x00000000); break;
2281             case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
2282                                       0x0000007f, 0x00000000); break;
2283             case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2284                                       0x000001ff, 0x00000000); break;
2285             case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
2286                                       0x00000fff, 0x00000002); break;
2287             default:         SET_ABCD(0x00000000, 0x00000000,
2288                                       0x00000000, 0x00000000); break;
2289          }
2290          break;
2291       case 0x00000005:
2292          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2293          break;
2294       case 0x00000006:
2295          SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
2296          break;
2297       case 0x00000007:
2298          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2299          break;
2300       case 0x00000008:
2301          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2302          break;
2303       case 0x00000009:
2304          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2305          break;
2306       case 0x0000000a:
2307          SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
2308          break;
2309       case 0x0000000b:
2310          switch (old_ecx) {
2311             case 0x00000000:
2312                SET_ABCD(0x00000001, 0x00000002,
2313                         0x00000100, 0x00000000); break;
2314             case 0x00000001:
2315                SET_ABCD(0x00000004, 0x00000004,
2316                         0x00000201, 0x00000000); break;
2317             default:
2318                SET_ABCD(0x00000000, 0x00000000,
2319                         old_ecx,    0x00000000); break;
2320          }
2321          break;
2322       case 0x0000000c:
2323          SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2324          break;
2325       case 0x0000000d:
2326          switch (old_ecx) {
2327             case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
2328                                       0x00000100, 0x00000000); break;
2329             case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
2330                                       0x00000201, 0x00000000); break;
2331             default:         SET_ABCD(0x00000000, 0x00000000,
2332                                       old_ecx,    0x00000000); break;
2333          }
2334          break;
2335       case 0x80000000:
2336          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2337          break;
2338       case 0x80000001:
2339          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
2340          break;
2341       case 0x80000002:
2342          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2343          break;
2344       case 0x80000003:
2345          SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
2346          break;
2347       case 0x80000004:
2348          SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
2349          break;
2350       case 0x80000005:
2351          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2352          break;
2353       case 0x80000006:
2354          SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
2355          break;
2356       case 0x80000007:
2357          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
2358          break;
2359       case 0x80000008:
2360          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2361          break;
2362       default:
2363          SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2364          break;
2365    }
2366 #  undef SET_ABCD
2367 }
2368 
2369 
amd64g_calculate_RCR(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)2370 ULong amd64g_calculate_RCR ( ULong arg,
2371                              ULong rot_amt,
2372                              ULong rflags_in,
2373                              Long  szIN )
2374 {
2375    Bool  wantRflags = toBool(szIN < 0);
2376    ULong sz         = wantRflags ? (-szIN) : szIN;
2377    ULong tempCOUNT  = rot_amt & (sz == 8 ? 0x3F : 0x1F);
2378    ULong cf=0, of=0, tempcf;
2379 
2380    switch (sz) {
2381       case 8:
2382          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2383          of        = ((arg >> 63) ^ cf) & 1;
2384          while (tempCOUNT > 0) {
2385             tempcf = arg & 1;
2386             arg    = (arg >> 1) | (cf << 63);
2387             cf     = tempcf;
2388             tempCOUNT--;
2389          }
2390          break;
2391       case 4:
2392          while (tempCOUNT >= 33) tempCOUNT -= 33;
2393          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2394          of        = ((arg >> 31) ^ cf) & 1;
2395          while (tempCOUNT > 0) {
2396             tempcf = arg & 1;
2397             arg    = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
2398             cf     = tempcf;
2399             tempCOUNT--;
2400          }
2401          break;
2402       case 2:
2403          while (tempCOUNT >= 17) tempCOUNT -= 17;
2404          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2405          of        = ((arg >> 15) ^ cf) & 1;
2406          while (tempCOUNT > 0) {
2407             tempcf = arg & 1;
2408             arg    = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
2409             cf     = tempcf;
2410             tempCOUNT--;
2411          }
2412          break;
2413       case 1:
2414          while (tempCOUNT >= 9) tempCOUNT -= 9;
2415          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2416          of        = ((arg >> 7) ^ cf) & 1;
2417          while (tempCOUNT > 0) {
2418             tempcf = arg & 1;
2419             arg    = ((arg >> 1) & 0x7FULL) | (cf << 7);
2420             cf     = tempcf;
2421             tempCOUNT--;
2422          }
2423          break;
2424       default:
2425          vpanic("calculate_RCR(amd64g): invalid size");
2426    }
2427 
2428    cf &= 1;
2429    of &= 1;
2430    rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
2431    rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
2432 
2433    /* caller can ask to have back either the resulting flags or
2434       resulting value, but not both */
2435    return wantRflags ? rflags_in : arg;
2436 }
2437 
amd64g_calculate_RCL(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)2438 ULong amd64g_calculate_RCL ( ULong arg,
2439                              ULong rot_amt,
2440                              ULong rflags_in,
2441                              Long  szIN )
2442 {
2443    Bool  wantRflags = toBool(szIN < 0);
2444    ULong sz         = wantRflags ? (-szIN) : szIN;
2445    ULong tempCOUNT  = rot_amt & (sz == 8 ? 0x3F : 0x1F);
2446    ULong cf=0, of=0, tempcf;
2447 
2448    switch (sz) {
2449       case 8:
2450          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2451          while (tempCOUNT > 0) {
2452             tempcf = (arg >> 63) & 1;
2453             arg    = (arg << 1) | (cf & 1);
2454             cf     = tempcf;
2455             tempCOUNT--;
2456          }
2457          of = ((arg >> 63) ^ cf) & 1;
2458          break;
2459       case 4:
2460          while (tempCOUNT >= 33) tempCOUNT -= 33;
2461          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2462          while (tempCOUNT > 0) {
2463             tempcf = (arg >> 31) & 1;
2464             arg    = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
2465             cf     = tempcf;
2466             tempCOUNT--;
2467          }
2468          of = ((arg >> 31) ^ cf) & 1;
2469          break;
2470       case 2:
2471          while (tempCOUNT >= 17) tempCOUNT -= 17;
2472          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2473          while (tempCOUNT > 0) {
2474             tempcf = (arg >> 15) & 1;
2475             arg    = 0xFFFFULL & ((arg << 1) | (cf & 1));
2476             cf     = tempcf;
2477             tempCOUNT--;
2478          }
2479          of = ((arg >> 15) ^ cf) & 1;
2480          break;
2481       case 1:
2482          while (tempCOUNT >= 9) tempCOUNT -= 9;
2483          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2484          while (tempCOUNT > 0) {
2485             tempcf = (arg >> 7) & 1;
2486             arg    = 0xFFULL & ((arg << 1) | (cf & 1));
2487             cf     = tempcf;
2488             tempCOUNT--;
2489          }
2490          of = ((arg >> 7) ^ cf) & 1;
2491          break;
2492       default:
2493          vpanic("calculate_RCL(amd64g): invalid size");
2494    }
2495 
2496    cf &= 1;
2497    of &= 1;
2498    rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
2499    rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
2500 
2501    return wantRflags ? rflags_in : arg;
2502 }
2503 
2504 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
2505  * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
2506  */
amd64g_calculate_pclmul(ULong a,ULong b,ULong which)2507 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
2508 {
2509     ULong hi, lo, tmp, A[16];
2510 
2511    A[0] = 0;            A[1] = a;
2512    A[2] = A[1] << 1;    A[3] = A[2] ^ a;
2513    A[4] = A[2] << 1;    A[5] = A[4] ^ a;
2514    A[6] = A[3] << 1;    A[7] = A[6] ^ a;
2515    A[8] = A[4] << 1;    A[9] = A[8] ^ a;
2516    A[10] = A[5] << 1;   A[11] = A[10] ^ a;
2517    A[12] = A[6] << 1;   A[13] = A[12] ^ a;
2518    A[14] = A[7] << 1;   A[15] = A[14] ^ a;
2519 
2520    lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
2521    hi = lo >> 56;
2522    lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
2523    hi = (hi << 8) | (lo >> 56);
2524    lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
2525    hi = (hi << 8) | (lo >> 56);
2526    lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
2527    hi = (hi << 8) | (lo >> 56);
2528    lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
2529    hi = (hi << 8) | (lo >> 56);
2530    lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
2531    hi = (hi << 8) | (lo >> 56);
2532    lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
2533    hi = (hi << 8) | (lo >> 56);
2534    lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
2535 
2536    ULong m0 = -1;
2537    m0 /= 255;
2538    tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
2539    tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
2540    tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
2541    tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
2542    tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
2543    tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
2544    tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
2545 
2546    return which ? hi : lo;
2547 }
2548 
2549 
2550 /* CALLED FROM GENERATED CODE */
2551 /* DIRTY HELPER (non-referentially-transparent) */
2552 /* Horrible hack.  On non-amd64 platforms, return 1. */
amd64g_dirtyhelper_RDTSC(void)2553 ULong amd64g_dirtyhelper_RDTSC ( void )
2554 {
2555 #  if defined(__x86_64__)
2556    UInt  eax, edx;
2557    __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
2558    return (((ULong)edx) << 32) | ((ULong)eax);
2559 #  else
2560    return 1ULL;
2561 #  endif
2562 }
2563 
2564 
2565 /* CALLED FROM GENERATED CODE */
2566 /* DIRTY HELPER (non-referentially-transparent) */
2567 /* Horrible hack.  On non-amd64 platforms, return 0. */
amd64g_dirtyhelper_IN(ULong portno,ULong sz)2568 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
2569 {
2570 #  if defined(__x86_64__)
2571    ULong r = 0;
2572    portno &= 0xFFFF;
2573    switch (sz) {
2574       case 4:
2575          __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
2576                               : "=a" (r) : "Nd" (portno));
2577 	 break;
2578       case 2:
2579          __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
2580                               : "=a" (r) : "Nd" (portno));
2581 	 break;
2582       case 1:
2583          __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
2584                               : "=a" (r) : "Nd" (portno));
2585 	 break;
2586       default:
2587          break; /* note: no 64-bit version of insn exists */
2588    }
2589    return r;
2590 #  else
2591    return 0;
2592 #  endif
2593 }
2594 
2595 
2596 /* CALLED FROM GENERATED CODE */
2597 /* DIRTY HELPER (non-referentially-transparent) */
2598 /* Horrible hack.  On non-amd64 platforms, do nothing. */
amd64g_dirtyhelper_OUT(ULong portno,ULong data,ULong sz)2599 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
2600 {
2601 #  if defined(__x86_64__)
2602    portno &= 0xFFFF;
2603    switch (sz) {
2604       case 4:
2605          __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
2606                               : : "a" (data), "Nd" (portno));
2607 	 break;
2608       case 2:
2609          __asm__ __volatile__("outw %w0, %w1"
2610                               : : "a" (data), "Nd" (portno));
2611 	 break;
2612       case 1:
2613          __asm__ __volatile__("outb %b0, %w1"
2614                               : : "a" (data), "Nd" (portno));
2615 	 break;
2616       default:
2617          break; /* note: no 64-bit version of insn exists */
2618    }
2619 #  else
2620    /* do nothing */
2621 #  endif
2622 }
2623 
2624 /* CALLED FROM GENERATED CODE */
2625 /* DIRTY HELPER (non-referentially-transparent) */
2626 /* Horrible hack.  On non-amd64 platforms, do nothing. */
2627 /* op = 0: call the native SGDT instruction.
2628    op = 1: call the native SIDT instruction.
2629 */
amd64g_dirtyhelper_SxDT(void * address,ULong op)2630 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
2631 #  if defined(__x86_64__)
2632    switch (op) {
2633       case 0:
2634          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2635          break;
2636       case 1:
2637          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2638          break;
2639       default:
2640          vpanic("amd64g_dirtyhelper_SxDT");
2641    }
2642 #  else
2643    /* do nothing */
2644    UChar* p = (UChar*)address;
2645    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2646    p[6] = p[7] = p[8] = p[9] = 0;
2647 #  endif
2648 }
2649 
2650 /*---------------------------------------------------------------*/
2651 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
2652 /*---------------------------------------------------------------*/
2653 
abdU8(UChar xx,UChar yy)2654 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2655    return toUChar(xx>yy ? xx-yy : yy-xx);
2656 }
2657 
mk32x2(UInt w1,UInt w0)2658 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2659    return (((ULong)w1) << 32) | ((ULong)w0);
2660 }
2661 
sel16x4_3(ULong w64)2662 static inline UShort sel16x4_3 ( ULong w64 ) {
2663    UInt hi32 = toUInt(w64 >> 32);
2664    return toUShort(hi32 >> 16);
2665 }
sel16x4_2(ULong w64)2666 static inline UShort sel16x4_2 ( ULong w64 ) {
2667    UInt hi32 = toUInt(w64 >> 32);
2668    return toUShort(hi32);
2669 }
sel16x4_1(ULong w64)2670 static inline UShort sel16x4_1 ( ULong w64 ) {
2671    UInt lo32 = toUInt(w64);
2672    return toUShort(lo32 >> 16);
2673 }
sel16x4_0(ULong w64)2674 static inline UShort sel16x4_0 ( ULong w64 ) {
2675    UInt lo32 = toUInt(w64);
2676    return toUShort(lo32);
2677 }
2678 
sel8x8_7(ULong w64)2679 static inline UChar sel8x8_7 ( ULong w64 ) {
2680    UInt hi32 = toUInt(w64 >> 32);
2681    return toUChar(hi32 >> 24);
2682 }
sel8x8_6(ULong w64)2683 static inline UChar sel8x8_6 ( ULong w64 ) {
2684    UInt hi32 = toUInt(w64 >> 32);
2685    return toUChar(hi32 >> 16);
2686 }
sel8x8_5(ULong w64)2687 static inline UChar sel8x8_5 ( ULong w64 ) {
2688    UInt hi32 = toUInt(w64 >> 32);
2689    return toUChar(hi32 >> 8);
2690 }
sel8x8_4(ULong w64)2691 static inline UChar sel8x8_4 ( ULong w64 ) {
2692    UInt hi32 = toUInt(w64 >> 32);
2693    return toUChar(hi32 >> 0);
2694 }
sel8x8_3(ULong w64)2695 static inline UChar sel8x8_3 ( ULong w64 ) {
2696    UInt lo32 = toUInt(w64);
2697    return toUChar(lo32 >> 24);
2698 }
sel8x8_2(ULong w64)2699 static inline UChar sel8x8_2 ( ULong w64 ) {
2700    UInt lo32 = toUInt(w64);
2701    return toUChar(lo32 >> 16);
2702 }
sel8x8_1(ULong w64)2703 static inline UChar sel8x8_1 ( ULong w64 ) {
2704    UInt lo32 = toUInt(w64);
2705    return toUChar(lo32 >> 8);
2706 }
sel8x8_0(ULong w64)2707 static inline UChar sel8x8_0 ( ULong w64 ) {
2708    UInt lo32 = toUInt(w64);
2709    return toUChar(lo32 >> 0);
2710 }
2711 
2712 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_pmaddwd(ULong xx,ULong yy)2713 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2714 {
2715    return
2716       mk32x2(
2717          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2718             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2719          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2720             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2721       );
2722 }
2723 
2724 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_pmovmskb(ULong xx)2725 ULong amd64g_calculate_mmx_pmovmskb ( ULong xx )
2726 {
2727    ULong r = 0;
2728    if (xx & (1ULL << (64-1))) r |= (1<<7);
2729    if (xx & (1ULL << (56-1))) r |= (1<<6);
2730    if (xx & (1ULL << (48-1))) r |= (1<<5);
2731    if (xx & (1ULL << (40-1))) r |= (1<<4);
2732    if (xx & (1ULL << (32-1))) r |= (1<<3);
2733    if (xx & (1ULL << (24-1))) r |= (1<<2);
2734    if (xx & (1ULL << (16-1))) r |= (1<<1);
2735    if (xx & (1ULL << ( 8-1))) r |= (1<<0);
2736    return r;
2737 }
2738 
2739 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_psadbw(ULong xx,ULong yy)2740 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2741 {
2742    UInt t = 0;
2743    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2744    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2745    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2746    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2747    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2748    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2749    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2750    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2751    t &= 0xFFFF;
2752    return (ULong)t;
2753 }
2754 
2755 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_sse_pmovmskb(ULong w64hi,ULong w64lo)2756 ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
2757 {
2758    ULong rHi8 = amd64g_calculate_mmx_pmovmskb ( w64hi );
2759    ULong rLo8 = amd64g_calculate_mmx_pmovmskb ( w64lo );
2760    return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
2761 }
2762 
2763 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32b(ULong crcIn,ULong b)2764 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
2765 {
2766    UInt  i;
2767    ULong crc = (b & 0xFFULL) ^ crcIn;
2768    for (i = 0; i < 8; i++)
2769       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
2770    return crc;
2771 }
2772 
2773 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32w(ULong crcIn,ULong w)2774 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w )
2775 {
2776    UInt  i;
2777    ULong crc = (w & 0xFFFFULL) ^ crcIn;
2778    for (i = 0; i < 16; i++)
2779       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
2780    return crc;
2781 }
2782 
2783 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32l(ULong crcIn,ULong l)2784 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l )
2785 {
2786    UInt i;
2787    ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn;
2788    for (i = 0; i < 32; i++)
2789       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
2790    return crc;
2791 }
2792 
2793 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32q(ULong crcIn,ULong q)2794 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q )
2795 {
2796    ULong crc = amd64g_calc_crc32l(crcIn, q);
2797    return amd64g_calc_crc32l(crc, q >> 32);
2798 }
2799 
2800 
2801 /*---------------------------------------------------------------*/
2802 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M}                    ---*/
2803 /*---------------------------------------------------------------*/
2804 
zmask_from_V128(V128 * arg)2805 static UInt zmask_from_V128 ( V128* arg )
2806 {
2807    UInt i, res = 0;
2808    for (i = 0; i < 16; i++) {
2809       res |=  ((arg->w8[i] == 0) ? 1 : 0) << i;
2810    }
2811    return res;
2812 }
2813 
2814 /* Helps with PCMP{I,E}STR{I,M}.
2815 
2816    CALLED FROM GENERATED CODE: DIRTY HELPER(s).  (But not really,
2817    actually it could be a clean helper, but for the fact that we can't
2818    pass by value 2 x V128 to a clean helper, nor have one returned.)
2819    Reads guest state, writes to guest state for the xSTRM cases, no
2820    accesses of memory, is a pure function.
2821 
2822    opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
2823    the callee knows which I/E and I/M variant it is dealing with and
2824    what the specific operation is.  4th byte of opcode is in the range
2825    0x60 to 0x63:
2826        istri  66 0F 3A 63
2827        istrm  66 0F 3A 62
2828        estri  66 0F 3A 61
2829        estrm  66 0F 3A 60
2830 
2831    gstOffL and gstOffR are the guest state offsets for the two XMM
2832    register inputs.  We never have to deal with the memory case since
2833    that is handled by pre-loading the relevant value into the fake
2834    XMM16 register.
2835 
2836    For ESTRx variants, edxIN and eaxIN hold the values of those two
2837    registers.
2838 
2839    In all cases, the bottom 16 bits of the result contain the new
2840    OSZACP %rflags values.  For xSTRI variants, bits[31:16] of the
2841    result hold the new %ecx value.  For xSTRM variants, the helper
2842    writes the result directly to the guest XMM0.
2843 
2844    Declarable side effects: in all cases, reads guest state at
2845    [gstOffL, +16) and [gstOffR, +16).  For xSTRM variants, also writes
2846    guest_XMM0.
2847 
2848    Is expected to be called with opc_and_imm combinations which have
2849    actually been validated, and will assert if otherwise.  The front
2850    end should ensure we're only called with verified values.
2851 */
amd64g_dirtyhelper_PCMPxSTRx(VexGuestAMD64State * gst,HWord opc4_and_imm,HWord gstOffL,HWord gstOffR,HWord edxIN,HWord eaxIN)2852 ULong amd64g_dirtyhelper_PCMPxSTRx (
2853           VexGuestAMD64State* gst,
2854           HWord opc4_and_imm,
2855           HWord gstOffL, HWord gstOffR,
2856           HWord edxIN, HWord eaxIN
2857        )
2858 {
2859    HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
2860    HWord imm8 = opc4_and_imm & 0xFF;
2861    HWord isISTRx = opc4 & 2;
2862    HWord isxSTRM = (opc4 & 1) ^ 1;
2863    vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
2864    vassert((imm8 & 1) == 0); /* we support byte-size cases only */
2865 
2866    // where the args are
2867    V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
2868    V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
2869 
2870    /* Create the arg validity masks, either from the vectors
2871       themselves or from the supplied edx/eax values. */
2872    // FIXME: this is only right for the 8-bit data cases.
2873    // At least that is asserted above.
2874    UInt zmaskL, zmaskR;
2875    if (isISTRx) {
2876       zmaskL = zmask_from_V128(argL);
2877       zmaskR = zmask_from_V128(argR);
2878    } else {
2879       Int tmp;
2880       tmp = edxIN & 0xFFFFFFFF;
2881       if (tmp < -16) tmp = -16;
2882       if (tmp > 16)  tmp = 16;
2883       if (tmp < 0)   tmp = -tmp;
2884       vassert(tmp >= 0 && tmp <= 16);
2885       zmaskL = (1 << tmp) & 0xFFFF;
2886       tmp = eaxIN & 0xFFFFFFFF;
2887       if (tmp < -16) tmp = -16;
2888       if (tmp > 16)  tmp = 16;
2889       if (tmp < 0)   tmp = -tmp;
2890       vassert(tmp >= 0 && tmp <= 16);
2891       zmaskR = (1 << tmp) & 0xFFFF;
2892    }
2893 
2894    // temp spot for the resulting flags and vector.
2895    V128 resV;
2896    UInt resOSZACP;
2897 
2898    // do the meyaath
2899    Bool ok = compute_PCMPxSTRx (
2900                 &resV, &resOSZACP, argL, argR,
2901                 zmaskL, zmaskR, imm8, (Bool)isxSTRM
2902              );
2903 
2904    // front end shouldn't pass us any imm8 variants we can't
2905    // handle.  Hence:
2906    vassert(ok);
2907 
2908    // So, finally we need to get the results back to the caller.
2909    // In all cases, the new OSZACP value is the lowest 16 of
2910    // the return value.
2911    if (isxSTRM) {
2912       /* gst->guest_XMM0 = resV; */ // gcc don't like that
2913       gst->guest_XMM0[0] = resV.w32[0];
2914       gst->guest_XMM0[1] = resV.w32[1];
2915       gst->guest_XMM0[2] = resV.w32[2];
2916       gst->guest_XMM0[3] = resV.w32[3];
2917       return resOSZACP & 0x8D5;
2918    } else {
2919       UInt newECX = resV.w32[0] & 0xFFFF;
2920       return (newECX << 16) | (resOSZACP & 0x8D5);
2921    }
2922 }
2923 
2924 
2925 /*---------------------------------------------------------------*/
2926 /*--- Helpers for dealing with, and describing,               ---*/
2927 /*--- guest state as a whole.                                 ---*/
2928 /*---------------------------------------------------------------*/
2929 
2930 /* Initialise the entire amd64 guest state. */
2931 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_initialise(VexGuestAMD64State * vex_state)2932 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
2933 {
2934    vex_state->guest_RAX = 0;
2935    vex_state->guest_RCX = 0;
2936    vex_state->guest_RDX = 0;
2937    vex_state->guest_RBX = 0;
2938    vex_state->guest_RSP = 0;
2939    vex_state->guest_RBP = 0;
2940    vex_state->guest_RSI = 0;
2941    vex_state->guest_RDI = 0;
2942    vex_state->guest_R8  = 0;
2943    vex_state->guest_R9  = 0;
2944    vex_state->guest_R10 = 0;
2945    vex_state->guest_R11 = 0;
2946    vex_state->guest_R12 = 0;
2947    vex_state->guest_R13 = 0;
2948    vex_state->guest_R14 = 0;
2949    vex_state->guest_R15 = 0;
2950 
2951    vex_state->guest_CC_OP   = AMD64G_CC_OP_COPY;
2952    vex_state->guest_CC_DEP1 = 0;
2953    vex_state->guest_CC_DEP2 = 0;
2954    vex_state->guest_CC_NDEP = 0;
2955 
2956    vex_state->guest_DFLAG   = 1; /* forwards */
2957    vex_state->guest_IDFLAG  = 0;
2958 
2959    /* HACK: represent the offset associated with %fs==0. This
2960       assumes that %fs is only ever zero. */
2961    vex_state->guest_FS_ZERO = 0;
2962 
2963    vex_state->guest_RIP = 0;
2964 
2965    /* Initialise the simulated FPU */
2966    amd64g_dirtyhelper_FINIT( vex_state );
2967 
2968    /* Initialise the SSE state. */
2969 #  define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2970 
2971    vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
2972    SSEZERO(vex_state->guest_XMM0);
2973    SSEZERO(vex_state->guest_XMM1);
2974    SSEZERO(vex_state->guest_XMM2);
2975    SSEZERO(vex_state->guest_XMM3);
2976    SSEZERO(vex_state->guest_XMM4);
2977    SSEZERO(vex_state->guest_XMM5);
2978    SSEZERO(vex_state->guest_XMM6);
2979    SSEZERO(vex_state->guest_XMM7);
2980    SSEZERO(vex_state->guest_XMM8);
2981    SSEZERO(vex_state->guest_XMM9);
2982    SSEZERO(vex_state->guest_XMM10);
2983    SSEZERO(vex_state->guest_XMM11);
2984    SSEZERO(vex_state->guest_XMM12);
2985    SSEZERO(vex_state->guest_XMM13);
2986    SSEZERO(vex_state->guest_XMM14);
2987    SSEZERO(vex_state->guest_XMM15);
2988    SSEZERO(vex_state->guest_XMM16);
2989 
2990 #  undef SSEZERO
2991 
2992    vex_state->guest_EMWARN = EmWarn_NONE;
2993 
2994    /* These should not ever be either read or written, but we
2995       initialise them anyway. */
2996    vex_state->guest_TISTART = 0;
2997    vex_state->guest_TILEN   = 0;
2998 
2999    vex_state->guest_NRADDR   = 0;
3000    vex_state->guest_SC_CLASS = 0;
3001    vex_state->guest_GS_0x60  = 0;
3002 
3003    vex_state->guest_IP_AT_SYSCALL = 0;
3004    /* vex_state->padding = 0; */
3005 }
3006 
3007 
3008 /* Figure out if any part of the guest state contained in minoff
3009    .. maxoff requires precise memory exceptions.  If in doubt return
3010    True (but this is generates significantly slower code).
3011 
3012    By default we enforce precise exns for guest %RSP, %RBP and %RIP
3013    only.  These are the minimum needed to extract correct stack
3014    backtraces from amd64 code.
3015 */
guest_amd64_state_requires_precise_mem_exns(Int minoff,Int maxoff)3016 Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff,
3017                                                    Int maxoff)
3018 {
3019    Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
3020    Int rbp_max = rbp_min + 8 - 1;
3021    Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
3022    Int rsp_max = rsp_min + 8 - 1;
3023    Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
3024    Int rip_max = rip_min + 8 - 1;
3025 
3026    if (maxoff < rbp_min || minoff > rbp_max) {
3027       /* no overlap with rbp */
3028    } else {
3029       return True;
3030    }
3031 
3032    if (maxoff < rsp_min || minoff > rsp_max) {
3033       /* no overlap with rsp */
3034    } else {
3035       return True;
3036    }
3037 
3038    if (maxoff < rip_min || minoff > rip_max) {
3039       /* no overlap with eip */
3040    } else {
3041       return True;
3042    }
3043 
3044    return False;
3045 }
3046 
3047 
3048 #define ALWAYSDEFD(field)                             \
3049     { offsetof(VexGuestAMD64State, field),            \
3050       (sizeof ((VexGuestAMD64State*)0)->field) }
3051 
3052 VexGuestLayout
3053    amd64guest_layout
3054       = {
3055           /* Total size of the guest state, in bytes. */
3056           .total_sizeB = sizeof(VexGuestAMD64State),
3057 
3058           /* Describe the stack pointer. */
3059           .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
3060           .sizeof_SP = 8,
3061 
3062           /* Describe the frame pointer. */
3063           .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
3064           .sizeof_FP = 8,
3065 
3066           /* Describe the instruction pointer. */
3067           .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
3068           .sizeof_IP = 8,
3069 
3070           /* Describe any sections to be regarded by Memcheck as
3071              'always-defined'. */
3072           .n_alwaysDefd = 16,
3073 
3074           /* flags thunk: OP and NDEP are always defd, whereas DEP1
3075              and DEP2 have to be tracked.  See detailed comment in
3076              gdefs.h on meaning of thunk fields. */
3077           .alwaysDefd
3078              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
3079                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
3080 		 /*  2 */ ALWAYSDEFD(guest_DFLAG),
3081                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
3082                  /*  4 */ ALWAYSDEFD(guest_RIP),
3083                  /*  5 */ ALWAYSDEFD(guest_FS_ZERO),
3084                  /*  6 */ ALWAYSDEFD(guest_FTOP),
3085                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
3086                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
3087                  /*  9 */ ALWAYSDEFD(guest_FC3210),
3088                  // /* */ ALWAYSDEFD(guest_CS),
3089                  // /* */ ALWAYSDEFD(guest_DS),
3090                  // /* */ ALWAYSDEFD(guest_ES),
3091                  // /* */ ALWAYSDEFD(guest_FS),
3092                  // /* */ ALWAYSDEFD(guest_GS),
3093                  // /* */ ALWAYSDEFD(guest_SS),
3094                  // /* */ ALWAYSDEFD(guest_LDT),
3095                  // /* */ ALWAYSDEFD(guest_GDT),
3096                  /* 10 */ ALWAYSDEFD(guest_EMWARN),
3097                  /* 11 */ ALWAYSDEFD(guest_SSEROUND),
3098                  /* 12 */ ALWAYSDEFD(guest_TISTART),
3099                  /* 13 */ ALWAYSDEFD(guest_TILEN),
3100                  /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
3101                  /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
3102                }
3103         };
3104 
3105 
3106 /*---------------------------------------------------------------*/
3107 /*--- end                               guest_amd64_helpers.c ---*/
3108 /*---------------------------------------------------------------*/
3109