• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                             guest_amd64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2013 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_amd64.h"
39 #include "libvex_ir.h"
40 #include "libvex.h"
41 
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_amd64_defs.h"
46 #include "guest_generic_x87.h"
47 
48 
49 /* This file contains helper functions for amd64 guest code.
50    Calls to these functions are generated by the back end.
51    These calls are of course in the host machine code and
52    this file will be compiled to host machine code, so that
53    all makes sense.
54 
55    Only change the signatures of these helper functions very
56    carefully.  If you change the signature here, you'll have to change
57    the parameters passed to it in the IR calls constructed by
58    guest-amd64/toIR.c.
59 
60    The convention used is that all functions called from generated
61    code are named amd64g_<something>, and any function whose name lacks
62    that prefix is not called from generated code.  Note that some
63    LibVEX_* functions can however be called by VEX's client, but that
64    is not the same as calling them from VEX-generated code.
65 */
66 
67 
68 /* Set to 1 to get detailed profiling info about use of the flag
69    machinery. */
70 #define PROFILE_RFLAGS 0
71 
72 
73 /*---------------------------------------------------------------*/
74 /*--- %rflags run-time helpers.                               ---*/
75 /*---------------------------------------------------------------*/
76 
77 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
78    after imulq/mulq. */
79 
mullS64(Long u,Long v,Long * rHi,Long * rLo)80 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
81 {
82    ULong u0, v0, w0;
83     Long u1, v1, w1, w2, t;
84    u0   = u & 0xFFFFFFFFULL;
85    u1   = u >> 32;
86    v0   = v & 0xFFFFFFFFULL;
87    v1   = v >> 32;
88    w0   = u0 * v0;
89    t    = u1 * v0 + (w0 >> 32);
90    w1   = t & 0xFFFFFFFFULL;
91    w2   = t >> 32;
92    w1   = u0 * v1 + w1;
93    *rHi = u1 * v1 + w2 + (w1 >> 32);
94    *rLo = u * v;
95 }
96 
mullU64(ULong u,ULong v,ULong * rHi,ULong * rLo)97 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
98 {
99    ULong u0, v0, w0;
100    ULong u1, v1, w1,w2,t;
101    u0   = u & 0xFFFFFFFFULL;
102    u1   = u >> 32;
103    v0   = v & 0xFFFFFFFFULL;
104    v1   = v >> 32;
105    w0   = u0 * v0;
106    t    = u1 * v0 + (w0 >> 32);
107    w1   = t & 0xFFFFFFFFULL;
108    w2   = t >> 32;
109    w1   = u0 * v1 + w1;
110    *rHi = u1 * v1 + w2 + (w1 >> 32);
111    *rLo = u * v;
112 }
113 
114 
115 static const UChar parity_table[256] = {
116     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
117     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
118     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
119     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
120     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
121     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
122     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
123     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
124     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
125     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
126     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
127     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
128     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
129     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
130     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
131     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
132     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
133     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
134     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
135     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
136     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
137     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
138     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
139     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
140     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
141     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
142     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
143     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
144     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
145     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
146     AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
147     0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
148 };
149 
150 /* generalised left-shifter */
lshift(Long x,Int n)151 static inline Long lshift ( Long x, Int n )
152 {
153    if (n >= 0)
154       return x << n;
155    else
156       return x >> (-n);
157 }
158 
159 /* identity on ULong */
idULong(ULong x)160 static inline ULong idULong ( ULong x )
161 {
162    return x;
163 }
164 
165 
166 #define PREAMBLE(__data_bits)					\
167    /* const */ ULong DATA_MASK 					\
168       = __data_bits==8                                          \
169            ? 0xFFULL 					        \
170            : (__data_bits==16                                   \
171                 ? 0xFFFFULL 		                        \
172                 : (__data_bits==32                              \
173                      ? 0xFFFFFFFFULL                            \
174                      : 0xFFFFFFFFFFFFFFFFULL));                 \
175    /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1);     \
176    /* const */ ULong CC_DEP1 = cc_dep1_formal;			\
177    /* const */ ULong CC_DEP2 = cc_dep2_formal;			\
178    /* const */ ULong CC_NDEP = cc_ndep_formal;			\
179    /* Four bogus assignments, which hopefully gcc can     */	\
180    /* optimise away, and which stop it complaining about  */	\
181    /* unused variables.                                   */	\
182    SIGN_MASK = SIGN_MASK;					\
183    DATA_MASK = DATA_MASK;					\
184    CC_DEP2 = CC_DEP2;						\
185    CC_NDEP = CC_NDEP;
186 
187 
188 /*-------------------------------------------------------------*/
189 
190 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE)			\
191 {								\
192    PREAMBLE(DATA_BITS);						\
193    { Long cf, pf, af, zf, sf, of;				\
194      Long argL, argR, res;					\
195      argL = CC_DEP1;						\
196      argR = CC_DEP2;						\
197      res  = argL + argR;					\
198      cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;			\
199      pf = parity_table[(UChar)res];				\
200      af = (res ^ argL ^ argR) & 0x10;				\
201      zf = ((DATA_UTYPE)res == 0) << 6;				\
202      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
203      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
204                  12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
205      return cf | pf | af | zf | sf | of;			\
206    }								\
207 }
208 
209 /*-------------------------------------------------------------*/
210 
211 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE)			\
212 {								\
213    PREAMBLE(DATA_BITS);						\
214    { Long cf, pf, af, zf, sf, of;				\
215      Long argL, argR, res;					\
216      argL = CC_DEP1;						\
217      argR = CC_DEP2;						\
218      res  = argL - argR;					\
219      cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;			\
220      pf = parity_table[(UChar)res];				\
221      af = (res ^ argL ^ argR) & 0x10;				\
222      zf = ((DATA_UTYPE)res == 0) << 6;				\
223      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
224      of = lshift((argL ^ argR) & (argL ^ res),	 		\
225                  12 - DATA_BITS) & AMD64G_CC_MASK_O; 		\
226      return cf | pf | af | zf | sf | of;			\
227    }								\
228 }
229 
230 /*-------------------------------------------------------------*/
231 
232 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE)			\
233 {								\
234    PREAMBLE(DATA_BITS);						\
235    { Long cf, pf, af, zf, sf, of;				\
236      Long argL, argR, oldC, res;		 		\
237      oldC = CC_NDEP & AMD64G_CC_MASK_C;				\
238      argL = CC_DEP1;						\
239      argR = CC_DEP2 ^ oldC;	       				\
240      res  = (argL + argR) + oldC;				\
241      if (oldC)							\
242         cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL;		\
243      else							\
244         cf = (DATA_UTYPE)res < (DATA_UTYPE)argL;		\
245      pf = parity_table[(UChar)res];				\
246      af = (res ^ argL ^ argR) & 0x10;				\
247      zf = ((DATA_UTYPE)res == 0) << 6;				\
248      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
249      of = lshift((argL ^ argR ^ -1) & (argL ^ res), 		\
250                   12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
251      return cf | pf | af | zf | sf | of;			\
252    }								\
253 }
254 
255 /*-------------------------------------------------------------*/
256 
257 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE)			\
258 {								\
259    PREAMBLE(DATA_BITS);						\
260    { Long cf, pf, af, zf, sf, of;				\
261      Long argL, argR, oldC, res;	       			\
262      oldC = CC_NDEP & AMD64G_CC_MASK_C;				\
263      argL = CC_DEP1;						\
264      argR = CC_DEP2 ^ oldC;	       				\
265      res  = (argL - argR) - oldC;				\
266      if (oldC)							\
267         cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR;		\
268      else							\
269         cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR;		\
270      pf = parity_table[(UChar)res];				\
271      af = (res ^ argL ^ argR) & 0x10;				\
272      zf = ((DATA_UTYPE)res == 0) << 6;				\
273      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
274      of = lshift((argL ^ argR) & (argL ^ res), 			\
275                  12 - DATA_BITS) & AMD64G_CC_MASK_O;		\
276      return cf | pf | af | zf | sf | of;			\
277    }								\
278 }
279 
280 /*-------------------------------------------------------------*/
281 
282 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE)			\
283 {								\
284    PREAMBLE(DATA_BITS);						\
285    { Long cf, pf, af, zf, sf, of;				\
286      cf = 0;							\
287      pf = parity_table[(UChar)CC_DEP1];				\
288      af = 0;							\
289      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
290      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
291      of = 0;							\
292      return cf | pf | af | zf | sf | of;			\
293    }								\
294 }
295 
296 /*-------------------------------------------------------------*/
297 
298 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE)			\
299 {								\
300    PREAMBLE(DATA_BITS);						\
301    { Long cf, pf, af, zf, sf, of;				\
302      Long argL, argR, res;					\
303      res  = CC_DEP1;						\
304      argL = res - 1;						\
305      argR = 1;							\
306      cf = CC_NDEP & AMD64G_CC_MASK_C;				\
307      pf = parity_table[(UChar)res];				\
308      af = (res ^ argL ^ argR) & 0x10;				\
309      zf = ((DATA_UTYPE)res == 0) << 6;				\
310      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
311      of = ((res & DATA_MASK) == SIGN_MASK) << 11;		\
312      return cf | pf | af | zf | sf | of;			\
313    }								\
314 }
315 
316 /*-------------------------------------------------------------*/
317 
318 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE)			\
319 {								\
320    PREAMBLE(DATA_BITS);						\
321    { Long cf, pf, af, zf, sf, of;				\
322      Long argL, argR, res;					\
323      res  = CC_DEP1;						\
324      argL = res + 1;						\
325      argR = 1;							\
326      cf = CC_NDEP & AMD64G_CC_MASK_C;				\
327      pf = parity_table[(UChar)res];				\
328      af = (res ^ argL ^ argR) & 0x10;				\
329      zf = ((DATA_UTYPE)res == 0) << 6;				\
330      sf = lshift(res, 8 - DATA_BITS) & 0x80;			\
331      of = ((res & DATA_MASK) 					\
332           == ((ULong)SIGN_MASK - 1)) << 11;			\
333      return cf | pf | af | zf | sf | of;			\
334    }								\
335 }
336 
337 /*-------------------------------------------------------------*/
338 
339 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE)			\
340 {								\
341    PREAMBLE(DATA_BITS);						\
342    { Long cf, pf, af, zf, sf, of;				\
343      cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C;	\
344      pf = parity_table[(UChar)CC_DEP1];				\
345      af = 0; /* undefined */					\
346      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
347      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
348      /* of is defined if shift count == 1 */			\
349      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) 		\
350           & AMD64G_CC_MASK_O;					\
351      return cf | pf | af | zf | sf | of;			\
352    }								\
353 }
354 
355 /*-------------------------------------------------------------*/
356 
357 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE)			\
358 {								\
359    PREAMBLE(DATA_BITS);  					\
360    { Long cf, pf, af, zf, sf, of;				\
361      cf = CC_DEP2 & 1;						\
362      pf = parity_table[(UChar)CC_DEP1];				\
363      af = 0; /* undefined */					\
364      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
365      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
366      /* of is defined if shift count == 1 */			\
367      of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS)		\
368           & AMD64G_CC_MASK_O;					\
369      return cf | pf | af | zf | sf | of;			\
370    }								\
371 }
372 
373 /*-------------------------------------------------------------*/
374 
375 /* ROL: cf' = lsb(result).  of' = msb(result) ^ lsb(result). */
376 /* DEP1 = result, NDEP = old flags */
377 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE)			\
378 {								\
379    PREAMBLE(DATA_BITS);						\
380    { Long fl 							\
381         = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C))	\
382           | (AMD64G_CC_MASK_C & CC_DEP1)			\
383           | (AMD64G_CC_MASK_O & (lshift(CC_DEP1,  		\
384                                       11-(DATA_BITS-1)) 	\
385                      ^ lshift(CC_DEP1, 11)));			\
386      return fl;							\
387    }								\
388 }
389 
390 /*-------------------------------------------------------------*/
391 
392 /* ROR: cf' = msb(result).  of' = msb(result) ^ msb-1(result). */
393 /* DEP1 = result, NDEP = old flags */
394 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE)			\
395 {								\
396    PREAMBLE(DATA_BITS);						\
397    { Long fl 							\
398         = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C))	\
399           | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1)))	\
400           | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, 		\
401                                       11-(DATA_BITS-1)) 	\
402                      ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1)));	\
403      return fl;							\
404    }								\
405 }
406 
407 /*-------------------------------------------------------------*/
408 
409 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE,  NARROWtoU,         \
410                                 DATA_U2TYPE, NARROWto2U)        \
411 {                                                               \
412    PREAMBLE(DATA_BITS);                                         \
413    { Long cf, pf, af, zf, sf, of;                               \
414      DATA_UTYPE  hi;                                            \
415      DATA_UTYPE  lo                                             \
416         = NARROWtoU( ((DATA_UTYPE)CC_DEP1)                      \
417                      * ((DATA_UTYPE)CC_DEP2) );                 \
418      DATA_U2TYPE rr                                             \
419         = NARROWto2U(                                           \
420              ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1))               \
421              * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) );          \
422      hi = NARROWtoU(rr >>/*u*/ DATA_BITS);                      \
423      cf = (hi != 0);                                            \
424      pf = parity_table[(UChar)lo];                              \
425      af = 0; /* undefined */                                    \
426      zf = (lo == 0) << 6;                                       \
427      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
428      of = cf << 11;                                             \
429      return cf | pf | af | zf | sf | of;                        \
430    }								\
431 }
432 
433 /*-------------------------------------------------------------*/
434 
435 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE,  NARROWtoS,         \
436                                 DATA_S2TYPE, NARROWto2S)        \
437 {                                                               \
438    PREAMBLE(DATA_BITS);                                         \
439    { Long cf, pf, af, zf, sf, of;                               \
440      DATA_STYPE  hi;                                            \
441      DATA_STYPE  lo                                             \
442         = NARROWtoS( ((DATA_STYPE)CC_DEP1)                      \
443                      * ((DATA_STYPE)CC_DEP2) );                 \
444      DATA_S2TYPE rr                                             \
445         = NARROWto2S(                                           \
446              ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1))               \
447              * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) );          \
448      hi = NARROWtoS(rr >>/*s*/ DATA_BITS);                      \
449      cf = (hi != (lo >>/*s*/ (DATA_BITS-1)));                   \
450      pf = parity_table[(UChar)lo];                              \
451      af = 0; /* undefined */                                    \
452      zf = (lo == 0) << 6;                                       \
453      sf = lshift(lo, 8 - DATA_BITS) & 0x80;                     \
454      of = cf << 11;                                             \
455      return cf | pf | af | zf | sf | of;                        \
456    }								\
457 }
458 
459 /*-------------------------------------------------------------*/
460 
461 #define ACTIONS_UMULQ                                           \
462 {                                                               \
463    PREAMBLE(64);                                                \
464    { Long cf, pf, af, zf, sf, of;                               \
465      ULong lo, hi;                                              \
466      mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo );       \
467      cf = (hi != 0);                                            \
468      pf = parity_table[(UChar)lo];                              \
469      af = 0; /* undefined */                                    \
470      zf = (lo == 0) << 6;                                       \
471      sf = lshift(lo, 8 - 64) & 0x80;                            \
472      of = cf << 11;                                             \
473      return cf | pf | af | zf | sf | of;                        \
474    }								\
475 }
476 
477 /*-------------------------------------------------------------*/
478 
479 #define ACTIONS_SMULQ                                           \
480 {                                                               \
481    PREAMBLE(64);                                                \
482    { Long cf, pf, af, zf, sf, of;                               \
483      Long lo, hi;                                               \
484      mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo );         \
485      cf = (hi != (lo >>/*s*/ (64-1)));                          \
486      pf = parity_table[(UChar)lo];                              \
487      af = 0; /* undefined */                                    \
488      zf = (lo == 0) << 6;                                       \
489      sf = lshift(lo, 8 - 64) & 0x80;                            \
490      of = cf << 11;                                             \
491      return cf | pf | af | zf | sf | of;                        \
492    }								\
493 }
494 
495 /*-------------------------------------------------------------*/
496 
497 #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE)			\
498 {								\
499    PREAMBLE(DATA_BITS);						\
500    { Long cf, pf, af, zf, sf, of;				\
501      cf = 0;							\
502      pf = 0;							\
503      af = 0;							\
504      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
505      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
506      of = 0;							\
507      return cf | pf | af | zf | sf | of;			\
508    }								\
509 }
510 
511 /*-------------------------------------------------------------*/
512 
513 #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE)			\
514 {								\
515    PREAMBLE(DATA_BITS);						\
516    { Long cf, pf, af, zf, sf, of;				\
517      cf = ((DATA_UTYPE)CC_DEP2 != 0);				\
518      pf = 0;							\
519      af = 0;							\
520      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
521      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
522      of = 0;							\
523      return cf | pf | af | zf | sf | of;			\
524    }								\
525 }
526 
527 /*-------------------------------------------------------------*/
528 
529 #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE)			\
530 {								\
531    PREAMBLE(DATA_BITS);						\
532    { Long cf, pf, af, zf, sf, of;				\
533      cf = ((DATA_UTYPE)CC_DEP2 == 0);				\
534      pf = 0;							\
535      af = 0;							\
536      zf = 0;							\
537      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
538      of = 0;							\
539      return cf | pf | af | zf | sf | of;			\
540    }								\
541 }
542 
543 /*-------------------------------------------------------------*/
544 
545 #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE)			\
546 {								\
547    PREAMBLE(DATA_BITS);						\
548    { Long cf, pf, af, zf, sf, of;				\
549      cf = ((DATA_UTYPE)CC_DEP2 == 0);				\
550      pf = 0;							\
551      af = 0;							\
552      zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6;			\
553      sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80;		\
554      of = 0;							\
555      return cf | pf | af | zf | sf | of;			\
556    }								\
557 }
558 
559 /*-------------------------------------------------------------*/
560 
561 
562 #if PROFILE_RFLAGS
563 
564 static Bool initted     = False;
565 
566 /* C flag, fast route */
567 static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
568 /* C flag, slow route */
569 static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
570 /* table for calculate_cond */
571 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
572 /* total entry counts for calc_all, calc_c, calc_cond. */
573 static UInt n_calc_all  = 0;
574 static UInt n_calc_c    = 0;
575 static UInt n_calc_cond = 0;
576 
577 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
578 
579 
showCounts(void)580 static void showCounts ( void )
581 {
582    Int op, co;
583    HChar ch;
584    vex_printf("\nTotal calls: calc_all=%u   calc_cond=%u   calc_c=%u\n",
585               n_calc_all, n_calc_cond, n_calc_c);
586 
587    vex_printf("      cSLOW  cFAST    O   NO    B   NB    Z   NZ   BE  NBE"
588               "    S   NS    P   NP    L   NL   LE  NLE\n");
589    vex_printf("     -----------------------------------------------------"
590               "----------------------------------------\n");
591    for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
592 
593       ch = ' ';
594       if (op > 0 && (op-1) % 4 == 0)
595          ch = 'B';
596       if (op > 0 && (op-1) % 4 == 1)
597          ch = 'W';
598       if (op > 0 && (op-1) % 4 == 2)
599          ch = 'L';
600       if (op > 0 && (op-1) % 4 == 3)
601          ch = 'Q';
602 
603       vex_printf("%2d%c: ", op, ch);
604       vex_printf("%6u ", tabc_slow[op]);
605       vex_printf("%6u ", tabc_fast[op]);
606       for (co = 0; co < 16; co++) {
607          Int n = tab_cond[op][co];
608          if (n >= 1000) {
609             vex_printf(" %3dK", n / 1000);
610          } else
611          if (n >= 0) {
612             vex_printf(" %3d ", n );
613          } else {
614             vex_printf("     ");
615          }
616       }
617       vex_printf("\n");
618    }
619    vex_printf("\n");
620 }
621 
initCounts(void)622 static void initCounts ( void )
623 {
624    Int op, co;
625    initted = True;
626    for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
627       tabc_fast[op] = tabc_slow[op] = 0;
628       for (co = 0; co < 16; co++)
629          tab_cond[op][co] = 0;
630    }
631 }
632 
633 #endif /* PROFILE_RFLAGS */
634 
635 
636 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
637 /* Calculate all the 6 flags from the supplied thunk parameters.
638    Worker function, not directly called from generated code. */
639 static
amd64g_calculate_rflags_all_WRK(ULong cc_op,ULong cc_dep1_formal,ULong cc_dep2_formal,ULong cc_ndep_formal)640 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
641                                         ULong cc_dep1_formal,
642                                         ULong cc_dep2_formal,
643                                         ULong cc_ndep_formal )
644 {
645    switch (cc_op) {
646       case AMD64G_CC_OP_COPY:
647          return cc_dep1_formal
648                 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
649                    | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
650 
651       case AMD64G_CC_OP_ADDB:   ACTIONS_ADD( 8,  UChar  );
652       case AMD64G_CC_OP_ADDW:   ACTIONS_ADD( 16, UShort );
653       case AMD64G_CC_OP_ADDL:   ACTIONS_ADD( 32, UInt   );
654       case AMD64G_CC_OP_ADDQ:   ACTIONS_ADD( 64, ULong  );
655 
656       case AMD64G_CC_OP_ADCB:   ACTIONS_ADC( 8,  UChar  );
657       case AMD64G_CC_OP_ADCW:   ACTIONS_ADC( 16, UShort );
658       case AMD64G_CC_OP_ADCL:   ACTIONS_ADC( 32, UInt   );
659       case AMD64G_CC_OP_ADCQ:   ACTIONS_ADC( 64, ULong  );
660 
661       case AMD64G_CC_OP_SUBB:   ACTIONS_SUB(  8, UChar  );
662       case AMD64G_CC_OP_SUBW:   ACTIONS_SUB( 16, UShort );
663       case AMD64G_CC_OP_SUBL:   ACTIONS_SUB( 32, UInt   );
664       case AMD64G_CC_OP_SUBQ:   ACTIONS_SUB( 64, ULong  );
665 
666       case AMD64G_CC_OP_SBBB:   ACTIONS_SBB(  8, UChar  );
667       case AMD64G_CC_OP_SBBW:   ACTIONS_SBB( 16, UShort );
668       case AMD64G_CC_OP_SBBL:   ACTIONS_SBB( 32, UInt   );
669       case AMD64G_CC_OP_SBBQ:   ACTIONS_SBB( 64, ULong  );
670 
671       case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC(  8, UChar  );
672       case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
673       case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt   );
674       case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong  );
675 
676       case AMD64G_CC_OP_INCB:   ACTIONS_INC(  8, UChar  );
677       case AMD64G_CC_OP_INCW:   ACTIONS_INC( 16, UShort );
678       case AMD64G_CC_OP_INCL:   ACTIONS_INC( 32, UInt   );
679       case AMD64G_CC_OP_INCQ:   ACTIONS_INC( 64, ULong  );
680 
681       case AMD64G_CC_OP_DECB:   ACTIONS_DEC(  8, UChar  );
682       case AMD64G_CC_OP_DECW:   ACTIONS_DEC( 16, UShort );
683       case AMD64G_CC_OP_DECL:   ACTIONS_DEC( 32, UInt   );
684       case AMD64G_CC_OP_DECQ:   ACTIONS_DEC( 64, ULong  );
685 
686       case AMD64G_CC_OP_SHLB:   ACTIONS_SHL(  8, UChar  );
687       case AMD64G_CC_OP_SHLW:   ACTIONS_SHL( 16, UShort );
688       case AMD64G_CC_OP_SHLL:   ACTIONS_SHL( 32, UInt   );
689       case AMD64G_CC_OP_SHLQ:   ACTIONS_SHL( 64, ULong  );
690 
691       case AMD64G_CC_OP_SHRB:   ACTIONS_SHR(  8, UChar  );
692       case AMD64G_CC_OP_SHRW:   ACTIONS_SHR( 16, UShort );
693       case AMD64G_CC_OP_SHRL:   ACTIONS_SHR( 32, UInt   );
694       case AMD64G_CC_OP_SHRQ:   ACTIONS_SHR( 64, ULong  );
695 
696       case AMD64G_CC_OP_ROLB:   ACTIONS_ROL(  8, UChar  );
697       case AMD64G_CC_OP_ROLW:   ACTIONS_ROL( 16, UShort );
698       case AMD64G_CC_OP_ROLL:   ACTIONS_ROL( 32, UInt   );
699       case AMD64G_CC_OP_ROLQ:   ACTIONS_ROL( 64, ULong  );
700 
701       case AMD64G_CC_OP_RORB:   ACTIONS_ROR(  8, UChar  );
702       case AMD64G_CC_OP_RORW:   ACTIONS_ROR( 16, UShort );
703       case AMD64G_CC_OP_RORL:   ACTIONS_ROR( 32, UInt   );
704       case AMD64G_CC_OP_RORQ:   ACTIONS_ROR( 64, ULong  );
705 
706       case AMD64G_CC_OP_UMULB:  ACTIONS_UMUL(  8, UChar,  toUChar,
707                                                   UShort, toUShort );
708       case AMD64G_CC_OP_UMULW:  ACTIONS_UMUL( 16, UShort, toUShort,
709                                                   UInt,   toUInt );
710       case AMD64G_CC_OP_UMULL:  ACTIONS_UMUL( 32, UInt,   toUInt,
711                                                   ULong,  idULong );
712 
713       case AMD64G_CC_OP_UMULQ:  ACTIONS_UMULQ;
714 
715       case AMD64G_CC_OP_SMULB:  ACTIONS_SMUL(  8, Char,   toUChar,
716                                                   Short,  toUShort );
717       case AMD64G_CC_OP_SMULW:  ACTIONS_SMUL( 16, Short,  toUShort,
718                                                   Int,    toUInt   );
719       case AMD64G_CC_OP_SMULL:  ACTIONS_SMUL( 32, Int,    toUInt,
720                                                   Long,   idULong );
721 
722       case AMD64G_CC_OP_SMULQ:  ACTIONS_SMULQ;
723 
724       case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt   );
725       case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong  );
726 
727       case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt   );
728       case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong  );
729 
730       case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt   );
731       case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong  );
732 
733       case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt   );
734       case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong  );
735 
736       default:
737          /* shouldn't really make these calls from generated code */
738          vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
739                     "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
740                     cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
741          vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
742    }
743 }
744 
745 
746 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
747 /* Calculate all the 6 flags from the supplied thunk parameters. */
amd64g_calculate_rflags_all(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)748 ULong amd64g_calculate_rflags_all ( ULong cc_op,
749                                     ULong cc_dep1,
750                                     ULong cc_dep2,
751                                     ULong cc_ndep )
752 {
753 #  if PROFILE_RFLAGS
754    if (!initted) initCounts();
755    n_calc_all++;
756    if (SHOW_COUNTS_NOW) showCounts();
757 #  endif
758    return
759       amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
760 }
761 
762 
763 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
764 /* Calculate just the carry flag from the supplied thunk parameters. */
amd64g_calculate_rflags_c(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)765 ULong amd64g_calculate_rflags_c ( ULong cc_op,
766                                   ULong cc_dep1,
767                                   ULong cc_dep2,
768                                   ULong cc_ndep )
769 {
770 #  if PROFILE_RFLAGS
771    if (!initted) initCounts();
772    n_calc_c++;
773    tabc_fast[cc_op]++;
774    if (SHOW_COUNTS_NOW) showCounts();
775 #  endif
776 
777    /* Fast-case some common ones. */
778    switch (cc_op) {
779       case AMD64G_CC_OP_COPY:
780          return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
781       case AMD64G_CC_OP_LOGICQ:
782       case AMD64G_CC_OP_LOGICL:
783       case AMD64G_CC_OP_LOGICW:
784       case AMD64G_CC_OP_LOGICB:
785          return 0;
786 	 //      case AMD64G_CC_OP_SUBL:
787 	 //         return ((UInt)cc_dep1) < ((UInt)cc_dep2)
788 	 //                   ? AMD64G_CC_MASK_C : 0;
789 	 //      case AMD64G_CC_OP_SUBW:
790 	 //         return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
791 	 //                   ? AMD64G_CC_MASK_C : 0;
792 	 //      case AMD64G_CC_OP_SUBB:
793 	 //         return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
794 	 //                   ? AMD64G_CC_MASK_C : 0;
795 	 //      case AMD64G_CC_OP_INCL:
796 	 //      case AMD64G_CC_OP_DECL:
797 	 //         return cc_ndep & AMD64G_CC_MASK_C;
798       default:
799          break;
800    }
801 
802 #  if PROFILE_RFLAGS
803    tabc_fast[cc_op]--;
804    tabc_slow[cc_op]++;
805 #  endif
806 
807    return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
808           & AMD64G_CC_MASK_C;
809 }
810 
811 
812 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
813 /* returns 1 or 0 */
amd64g_calculate_condition(ULong cond,ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)814 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
815                                    ULong cc_op,
816                                    ULong cc_dep1,
817                                    ULong cc_dep2,
818                                    ULong cc_ndep )
819 {
820    ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
821                                                   cc_dep2, cc_ndep);
822    ULong of,sf,zf,cf,pf;
823    ULong inv = cond & 1;
824 
825 #  if PROFILE_RFLAGS
826    if (!initted) initCounts();
827    tab_cond[cc_op][cond]++;
828    n_calc_cond++;
829    if (SHOW_COUNTS_NOW) showCounts();
830 #  endif
831 
832    switch (cond) {
833       case AMD64CondNO:
834       case AMD64CondO: /* OF == 1 */
835          of = rflags >> AMD64G_CC_SHIFT_O;
836          return 1 & (inv ^ of);
837 
838       case AMD64CondNZ:
839       case AMD64CondZ: /* ZF == 1 */
840          zf = rflags >> AMD64G_CC_SHIFT_Z;
841          return 1 & (inv ^ zf);
842 
843       case AMD64CondNB:
844       case AMD64CondB: /* CF == 1 */
845          cf = rflags >> AMD64G_CC_SHIFT_C;
846          return 1 & (inv ^ cf);
847          break;
848 
849       case AMD64CondNBE:
850       case AMD64CondBE: /* (CF or ZF) == 1 */
851          cf = rflags >> AMD64G_CC_SHIFT_C;
852          zf = rflags >> AMD64G_CC_SHIFT_Z;
853          return 1 & (inv ^ (cf | zf));
854          break;
855 
856       case AMD64CondNS:
857       case AMD64CondS: /* SF == 1 */
858          sf = rflags >> AMD64G_CC_SHIFT_S;
859          return 1 & (inv ^ sf);
860 
861       case AMD64CondNP:
862       case AMD64CondP: /* PF == 1 */
863          pf = rflags >> AMD64G_CC_SHIFT_P;
864          return 1 & (inv ^ pf);
865 
866       case AMD64CondNL:
867       case AMD64CondL: /* (SF xor OF) == 1 */
868          sf = rflags >> AMD64G_CC_SHIFT_S;
869          of = rflags >> AMD64G_CC_SHIFT_O;
870          return 1 & (inv ^ (sf ^ of));
871          break;
872 
873       case AMD64CondNLE:
874       case AMD64CondLE: /* ((SF xor OF) or ZF)  == 1 */
875          sf = rflags >> AMD64G_CC_SHIFT_S;
876          of = rflags >> AMD64G_CC_SHIFT_O;
877          zf = rflags >> AMD64G_CC_SHIFT_Z;
878          return 1 & (inv ^ ((sf ^ of) | zf));
879          break;
880 
881       default:
882          /* shouldn't really make these calls from generated code */
883          vex_printf("amd64g_calculate_condition"
884                     "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
885                     cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
886          vpanic("amd64g_calculate_condition");
887    }
888 }
889 
890 
891 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_get_rflags(const VexGuestAMD64State * vex_state)892 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state )
893 {
894    ULong rflags = amd64g_calculate_rflags_all_WRK(
895                      vex_state->guest_CC_OP,
896                      vex_state->guest_CC_DEP1,
897                      vex_state->guest_CC_DEP2,
898                      vex_state->guest_CC_NDEP
899                   );
900    Long dflag = vex_state->guest_DFLAG;
901    vassert(dflag == 1 || dflag == -1);
902    if (dflag == -1)
903       rflags |= (1<<10);
904    if (vex_state->guest_IDFLAG == 1)
905       rflags |= (1<<21);
906    if (vex_state->guest_ACFLAG == 1)
907       rflags |= (1<<18);
908 
909    return rflags;
910 }
911 
912 /* VISIBLE TO LIBVEX CLIENT */
913 void
LibVEX_GuestAMD64_put_rflag_c(ULong new_carry_flag,VexGuestAMD64State * vex_state)914 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
915                                /*MOD*/VexGuestAMD64State* vex_state )
916 {
917    ULong oszacp = amd64g_calculate_rflags_all_WRK(
918                      vex_state->guest_CC_OP,
919                      vex_state->guest_CC_DEP1,
920                      vex_state->guest_CC_DEP2,
921                      vex_state->guest_CC_NDEP
922                   );
923    if (new_carry_flag & 1) {
924       oszacp |= AMD64G_CC_MASK_C;
925    } else {
926       oszacp &= ~AMD64G_CC_MASK_C;
927    }
928    vex_state->guest_CC_OP   = AMD64G_CC_OP_COPY;
929    vex_state->guest_CC_DEP1 = oszacp;
930    vex_state->guest_CC_DEP2 = 0;
931    vex_state->guest_CC_NDEP = 0;
932 }
933 
934 
935 /*---------------------------------------------------------------*/
936 /*--- %rflags translation-time function specialisers.         ---*/
937 /*--- These help iropt specialise calls the above run-time    ---*/
938 /*--- %rflags functions.                                      ---*/
939 /*---------------------------------------------------------------*/
940 
941 /* Used by the optimiser to try specialisations.  Returns an
942    equivalent expression, or NULL if none. */
943 
isU64(IRExpr * e,ULong n)944 static Bool isU64 ( IRExpr* e, ULong n )
945 {
946    return toBool( e->tag == Iex_Const
947                   && e->Iex.Const.con->tag == Ico_U64
948                   && e->Iex.Const.con->Ico.U64 == n );
949 }
950 
guest_amd64_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)951 IRExpr* guest_amd64_spechelper ( const HChar* function_name,
952                                  IRExpr** args,
953                                  IRStmt** precedingStmts,
954                                  Int      n_precedingStmts )
955 {
956 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
957 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
958 #  define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
959 #  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
960 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
961 
962    Int i, arity = 0;
963    for (i = 0; args[i]; i++)
964       arity++;
965 #  if 0
966    vex_printf("spec request:\n");
967    vex_printf("   %s  ", function_name);
968    for (i = 0; i < arity; i++) {
969       vex_printf("  ");
970       ppIRExpr(args[i]);
971    }
972    vex_printf("\n");
973 #  endif
974 
975    /* --------- specialising "amd64g_calculate_condition" --------- */
976 
977    if (vex_streq(function_name, "amd64g_calculate_condition")) {
978       /* specialise calls to above "calculate condition" function */
979       IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
980       vassert(arity == 5);
981       cond    = args[0];
982       cc_op   = args[1];
983       cc_dep1 = args[2];
984       cc_dep2 = args[3];
985 
986       /*---------------- ADDQ ----------------*/
987 
988       if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
989          /* long long add, then Z --> test (dst+src == 0) */
990          return unop(Iop_1Uto64,
991                      binop(Iop_CmpEQ64,
992                            binop(Iop_Add64, cc_dep1, cc_dep2),
993                            mkU64(0)));
994       }
995 
996       /*---------------- SUBQ ----------------*/
997 
998       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
999          /* long long sub/cmp, then Z --> test dst==src */
1000          return unop(Iop_1Uto64,
1001                      binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
1002       }
1003       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
1004          /* long long sub/cmp, then NZ --> test dst!=src */
1005          return unop(Iop_1Uto64,
1006                      binop(Iop_CmpNE64,cc_dep1,cc_dep2));
1007       }
1008 
1009       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
1010          /* long long sub/cmp, then L (signed less than)
1011             --> test dst <s src */
1012          return unop(Iop_1Uto64,
1013                      binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1014       }
1015 
1016       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
1017          /* long long sub/cmp, then B (unsigned less than)
1018             --> test dst <u src */
1019          return unop(Iop_1Uto64,
1020                      binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1021       }
1022       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
1023          /* long long sub/cmp, then NB (unsigned greater than or equal)
1024             --> test src <=u dst */
1025          /* Note, args are opposite way round from the usual */
1026          return unop(Iop_1Uto64,
1027                      binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1028       }
1029 
1030       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) {
1031          /* long sub/cmp, then NLE (signed greater than)
1032             --> test !(dst <=s src)
1033             --> test (dst >s src)
1034             --> test (src <s dst) */
1035          return unop(Iop_1Uto64,
1036                      binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1037 
1038       }
1039 
1040       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
1041          /* long long sub/cmp, then BE (unsigned less than or equal)
1042             --> test dst <=u src */
1043          return unop(Iop_1Uto64,
1044                      binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1045       }
1046       if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) {
1047          /* long long sub/cmp, then NBE (unsigned greater than)
1048             --> test !(dst <=u src) */
1049          return binop(Iop_Xor64,
1050                       unop(Iop_1Uto64,
1051                            binop(Iop_CmpLE64U, cc_dep1, cc_dep2)),
1052                       mkU64(1));
1053       }
1054 
1055       /*---------------- SUBL ----------------*/
1056 
1057       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
1058          /* long sub/cmp, then Z --> test dst==src */
1059          return unop(Iop_1Uto64,
1060                      binop(Iop_CmpEQ32,
1061                            unop(Iop_64to32, cc_dep1),
1062                            unop(Iop_64to32, cc_dep2)));
1063       }
1064       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
1065          /* long sub/cmp, then NZ --> test dst!=src */
1066          return unop(Iop_1Uto64,
1067                      binop(Iop_CmpNE32,
1068                            unop(Iop_64to32, cc_dep1),
1069                            unop(Iop_64to32, cc_dep2)));
1070       }
1071 
1072       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
1073          /* long sub/cmp, then L (signed less than)
1074             --> test dst <s src */
1075          return unop(Iop_1Uto64,
1076                      binop(Iop_CmpLT32S,
1077                            unop(Iop_64to32, cc_dep1),
1078                            unop(Iop_64to32, cc_dep2)));
1079       }
1080 
1081       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
1082          /* long sub/cmp, then LE (signed less than or equal)
1083             --> test dst <=s src */
1084          return unop(Iop_1Uto64,
1085                      binop(Iop_CmpLE32S,
1086                            unop(Iop_64to32, cc_dep1),
1087                            unop(Iop_64to32, cc_dep2)));
1088 
1089       }
1090       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
1091          /* long sub/cmp, then NLE (signed greater than)
1092             --> test !(dst <=s src)
1093             --> test (dst >s src)
1094             --> test (src <s dst) */
1095          return unop(Iop_1Uto64,
1096                      binop(Iop_CmpLT32S,
1097                            unop(Iop_64to32, cc_dep2),
1098                            unop(Iop_64to32, cc_dep1)));
1099 
1100       }
1101 
1102       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
1103          /* long sub/cmp, then BE (unsigned less than or equal)
1104             --> test dst <=u src */
1105          return unop(Iop_1Uto64,
1106                      binop(Iop_CmpLE32U,
1107                            unop(Iop_64to32, cc_dep1),
1108                            unop(Iop_64to32, cc_dep2)));
1109       }
1110       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
1111          /* long sub/cmp, then NBE (unsigned greater than)
1112             --> test src <u dst */
1113          /* Note, args are opposite way round from the usual */
1114          return unop(Iop_1Uto64,
1115                      binop(Iop_CmpLT32U,
1116                            unop(Iop_64to32, cc_dep2),
1117                            unop(Iop_64to32, cc_dep1)));
1118       }
1119 
1120       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
1121          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
1122          return unop(Iop_1Uto64,
1123                      binop(Iop_CmpLT32S,
1124                            binop(Iop_Sub32,
1125                                  unop(Iop_64to32, cc_dep1),
1126                                  unop(Iop_64to32, cc_dep2)),
1127                            mkU32(0)));
1128       }
1129 
1130       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
1131          /* long sub/cmp, then B (unsigned less than)
1132             --> test dst <u src */
1133          return unop(Iop_1Uto64,
1134                      binop(Iop_CmpLT32U,
1135                            unop(Iop_64to32, cc_dep1),
1136                            unop(Iop_64to32, cc_dep2)));
1137       }
1138 
1139       /*---------------- SUBW ----------------*/
1140 
1141       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
1142          /* word sub/cmp, then Z --> test dst==src */
1143          return unop(Iop_1Uto64,
1144                      binop(Iop_CmpEQ16,
1145                            unop(Iop_64to16,cc_dep1),
1146                            unop(Iop_64to16,cc_dep2)));
1147       }
1148       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
1149          /* word sub/cmp, then NZ --> test dst!=src */
1150          return unop(Iop_1Uto64,
1151                      binop(Iop_CmpNE16,
1152                            unop(Iop_64to16,cc_dep1),
1153                            unop(Iop_64to16,cc_dep2)));
1154       }
1155 
1156       if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
1157          /* word sub/cmp, then LE (signed less than or equal)
1158             --> test dst <=s src */
1159          return unop(Iop_1Uto64,
1160                      binop(Iop_CmpLE64S,
1161                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
1162                            binop(Iop_Shl64,cc_dep2,mkU8(48))));
1163 
1164       }
1165 
1166       /*---------------- SUBB ----------------*/
1167 
1168       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
1169          /* byte sub/cmp, then Z --> test dst==src */
1170          return unop(Iop_1Uto64,
1171                      binop(Iop_CmpEQ8,
1172                            unop(Iop_64to8,cc_dep1),
1173                            unop(Iop_64to8,cc_dep2)));
1174       }
1175       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
1176          /* byte sub/cmp, then NZ --> test dst!=src */
1177          return unop(Iop_1Uto64,
1178                      binop(Iop_CmpNE8,
1179                            unop(Iop_64to8,cc_dep1),
1180                            unop(Iop_64to8,cc_dep2)));
1181       }
1182 
1183       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
1184          /* byte sub/cmp, then BE (unsigned less than or equal)
1185             --> test dst <=u src */
1186          return unop(Iop_1Uto64,
1187                      binop(Iop_CmpLE64U,
1188                            binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1189                            binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1190       }
1191 
1192       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
1193                                           && isU64(cc_dep2, 0)) {
1194          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1195                                          --> test dst <s 0
1196                                          --> (ULong)dst[7]
1197             This is yet another scheme by which gcc figures out if the
1198             top bit of a byte is 1 or 0.  See also LOGICB/CondS below. */
1199          /* Note: isU64(cc_dep2, 0) is correct, even though this is
1200             for an 8-bit comparison, since the args to the helper
1201             function are always U64s. */
1202          return binop(Iop_And64,
1203                       binop(Iop_Shr64,cc_dep1,mkU8(7)),
1204                       mkU64(1));
1205       }
1206       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
1207                                           && isU64(cc_dep2, 0)) {
1208          /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1209                                           --> test !(dst <s 0)
1210                                           --> (ULong) !dst[7]
1211          */
1212          return binop(Iop_Xor64,
1213                       binop(Iop_And64,
1214                             binop(Iop_Shr64,cc_dep1,mkU8(7)),
1215                             mkU64(1)),
1216                       mkU64(1));
1217       }
1218 
1219       /*---------------- LOGICQ ----------------*/
1220 
1221       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
1222          /* long long and/or/xor, then Z --> test dst==0 */
1223          return unop(Iop_1Uto64,
1224                      binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1225       }
1226       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
1227          /* long long and/or/xor, then NZ --> test dst!=0 */
1228          return unop(Iop_1Uto64,
1229                      binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1230       }
1231 
1232       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
1233          /* long long and/or/xor, then L
1234             LOGIC sets SF and ZF according to the
1235             result and makes OF be zero.  L computes SF ^ OF, but
1236             OF is zero, so this reduces to SF -- which will be 1 iff
1237             the result is < signed 0.  Hence ...
1238          */
1239          return unop(Iop_1Uto64,
1240                      binop(Iop_CmpLT64S,
1241                            cc_dep1,
1242                            mkU64(0)));
1243       }
1244 
1245       /*---------------- LOGICL ----------------*/
1246 
1247       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
1248          /* long and/or/xor, then Z --> test dst==0 */
1249          return unop(Iop_1Uto64,
1250                      binop(Iop_CmpEQ32,
1251                            unop(Iop_64to32, cc_dep1),
1252                            mkU32(0)));
1253       }
1254       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
1255          /* long and/or/xor, then NZ --> test dst!=0 */
1256          return unop(Iop_1Uto64,
1257                      binop(Iop_CmpNE32,
1258                            unop(Iop_64to32, cc_dep1),
1259                            mkU32(0)));
1260       }
1261 
1262       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
1263          /* long and/or/xor, then LE
1264             This is pretty subtle.  LOGIC sets SF and ZF according to the
1265             result and makes OF be zero.  LE computes (SF ^ OF) | ZF, but
1266             OF is zero, so this reduces to SF | ZF -- which will be 1 iff
1267             the result is <=signed 0.  Hence ...
1268          */
1269          return unop(Iop_1Uto64,
1270                      binop(Iop_CmpLE32S,
1271                            unop(Iop_64to32, cc_dep1),
1272                            mkU32(0)));
1273       }
1274 
1275       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
1276          /* long and/or/xor, then S --> (ULong)result[31] */
1277          return binop(Iop_And64,
1278                       binop(Iop_Shr64, cc_dep1, mkU8(31)),
1279                       mkU64(1));
1280       }
1281       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
1282          /* long and/or/xor, then S --> (ULong) ~ result[31] */
1283          return binop(Iop_Xor64,
1284                 binop(Iop_And64,
1285                       binop(Iop_Shr64, cc_dep1, mkU8(31)),
1286                       mkU64(1)),
1287                 mkU64(1));
1288       }
1289 
1290       /*---------------- LOGICW ----------------*/
1291 
1292       if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) {
1293          /* word and/or/xor, then Z --> test dst==0 */
1294          return unop(Iop_1Uto64,
1295                      binop(Iop_CmpEQ64,
1296                            binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
1297                            mkU64(0)));
1298       }
1299       if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) {
1300          /* word and/or/xor, then NZ --> test dst!=0 */
1301          return unop(Iop_1Uto64,
1302                      binop(Iop_CmpNE64,
1303                            binop(Iop_And64, cc_dep1, mkU64(0xFFFF)),
1304                            mkU64(0)));
1305       }
1306 
1307       /*---------------- LOGICB ----------------*/
1308 
1309       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
1310          /* byte and/or/xor, then Z --> test dst==0 */
1311          return unop(Iop_1Uto64,
1312                      binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
1313                                         mkU64(0)));
1314       }
1315       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
1316          /* byte and/or/xor, then NZ --> test dst!=0 */
1317          return unop(Iop_1Uto64,
1318                      binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
1319                                         mkU64(0)));
1320       }
1321 
1322       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
1323          /* this is an idiom gcc sometimes uses to find out if the top
1324             bit of a byte register is set: eg testb %al,%al; js ..
1325             Since it just depends on the top bit of the byte, extract
1326             that bit and explicitly get rid of all the rest.  This
1327             helps memcheck avoid false positives in the case where any
1328             of the other bits in the byte are undefined. */
1329          /* byte and/or/xor, then S --> (UInt)result[7] */
1330          return binop(Iop_And64,
1331                       binop(Iop_Shr64,cc_dep1,mkU8(7)),
1332                       mkU64(1));
1333       }
1334       if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
1335          /* byte and/or/xor, then NS --> (UInt)!result[7] */
1336          return binop(Iop_Xor64,
1337                       binop(Iop_And64,
1338                             binop(Iop_Shr64,cc_dep1,mkU8(7)),
1339                             mkU64(1)),
1340                       mkU64(1));
1341       }
1342 
1343       /*---------------- INCB ----------------*/
1344 
1345       if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
1346          /* 8-bit inc, then LE --> sign bit of the arg */
1347          return binop(Iop_And64,
1348                       binop(Iop_Shr64,
1349                             binop(Iop_Sub64, cc_dep1, mkU64(1)),
1350                             mkU8(7)),
1351                       mkU64(1));
1352       }
1353 
1354       /*---------------- INCW ----------------*/
1355 
1356       if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
1357          /* 16-bit inc, then Z --> test dst == 0 */
1358          return unop(Iop_1Uto64,
1359                      binop(Iop_CmpEQ64,
1360                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
1361                            mkU64(0)));
1362       }
1363 
1364       /*---------------- DECL ----------------*/
1365 
1366       if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
1367          /* dec L, then Z --> test dst == 0 */
1368          return unop(Iop_1Uto64,
1369                      binop(Iop_CmpEQ32,
1370                            unop(Iop_64to32, cc_dep1),
1371                            mkU32(0)));
1372       }
1373 
1374       /*---------------- DECW ----------------*/
1375 
1376       if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
1377          /* 16-bit dec, then NZ --> test dst != 0 */
1378          return unop(Iop_1Uto64,
1379                      binop(Iop_CmpNE64,
1380                            binop(Iop_Shl64,cc_dep1,mkU8(48)),
1381                            mkU64(0)));
1382       }
1383 
1384       /*---------------- COPY ----------------*/
1385       /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1386          jbe" for example. */
1387 
1388       if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
1389           (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
1390          /* COPY, then BE --> extract C and Z from dep1, and test (C
1391             or Z == 1). */
1392          /* COPY, then NBE --> extract C and Z from dep1, and test (C
1393             or Z == 0). */
1394          ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
1395          return
1396             unop(
1397                Iop_1Uto64,
1398                binop(
1399                   Iop_CmpEQ64,
1400                   binop(
1401                      Iop_And64,
1402                      binop(
1403                         Iop_Or64,
1404                         binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1405                         binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
1406                      ),
1407                      mkU64(1)
1408                   ),
1409                   mkU64(nnn)
1410                )
1411             );
1412       }
1413 
1414       if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
1415          /* COPY, then B --> extract C dep1, and test (C == 1). */
1416          return
1417             unop(
1418                Iop_1Uto64,
1419                binop(
1420                   Iop_CmpNE64,
1421                   binop(
1422                      Iop_And64,
1423                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
1424                      mkU64(1)
1425                   ),
1426                   mkU64(0)
1427                )
1428             );
1429       }
1430 
1431       if (isU64(cc_op, AMD64G_CC_OP_COPY)
1432           && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
1433          /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1434          /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1435          UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
1436          return
1437             unop(
1438                Iop_1Uto64,
1439                binop(
1440                   Iop_CmpEQ64,
1441                   binop(
1442                      Iop_And64,
1443                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
1444                      mkU64(1)
1445                   ),
1446                   mkU64(nnn)
1447                )
1448             );
1449       }
1450 
1451       if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
1452          /* COPY, then P --> extract P from dep1, and test (P == 1). */
1453          return
1454             unop(
1455                Iop_1Uto64,
1456                binop(
1457                   Iop_CmpNE64,
1458                   binop(
1459                      Iop_And64,
1460                      binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
1461                      mkU64(1)
1462                   ),
1463                   mkU64(0)
1464                )
1465             );
1466       }
1467 
1468       return NULL;
1469    }
1470 
1471    /* --------- specialising "amd64g_calculate_rflags_c" --------- */
1472 
1473    if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
1474       /* specialise calls to above "calculate_rflags_c" function */
1475       IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1476       vassert(arity == 4);
1477       cc_op   = args[0];
1478       cc_dep1 = args[1];
1479       cc_dep2 = args[2];
1480       cc_ndep = args[3];
1481 
1482       if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
1483          /* C after sub denotes unsigned less than */
1484          return unop(Iop_1Uto64,
1485                      binop(Iop_CmpLT64U,
1486                            cc_dep1,
1487                            cc_dep2));
1488       }
1489       if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
1490          /* C after sub denotes unsigned less than */
1491          return unop(Iop_1Uto64,
1492                      binop(Iop_CmpLT32U,
1493                            unop(Iop_64to32, cc_dep1),
1494                            unop(Iop_64to32, cc_dep2)));
1495       }
1496       if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
1497          /* C after sub denotes unsigned less than */
1498          return unop(Iop_1Uto64,
1499                      binop(Iop_CmpLT64U,
1500                            binop(Iop_And64,cc_dep1,mkU64(0xFF)),
1501                            binop(Iop_And64,cc_dep2,mkU64(0xFF))));
1502       }
1503       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
1504           || isU64(cc_op, AMD64G_CC_OP_LOGICL)
1505           || isU64(cc_op, AMD64G_CC_OP_LOGICW)
1506           || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
1507          /* cflag after logic is zero */
1508          return mkU64(0);
1509       }
1510       if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
1511           || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
1512          /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1513          return cc_ndep;
1514       }
1515 
1516 #     if 0
1517       if (cc_op->tag == Iex_Const) {
1518          vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1519       }
1520 #     endif
1521 
1522       return NULL;
1523    }
1524 
1525 #  undef unop
1526 #  undef binop
1527 #  undef mkU64
1528 #  undef mkU32
1529 #  undef mkU8
1530 
1531    return NULL;
1532 }
1533 
1534 
1535 /*---------------------------------------------------------------*/
1536 /*--- Supporting functions for x87 FPU activities.            ---*/
1537 /*---------------------------------------------------------------*/
1538 
host_is_little_endian(void)1539 static inline Bool host_is_little_endian ( void )
1540 {
1541    UInt x = 0x76543210;
1542    UChar* p = (UChar*)(&x);
1543    return toBool(*p == 0x10);
1544 }
1545 
1546 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1547 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_FXAM(ULong tag,ULong dbl)1548 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
1549 {
1550    Bool   mantissaIsZero;
1551    Int    bexp;
1552    UChar  sign;
1553    UChar* f64;
1554 
1555    vassert(host_is_little_endian());
1556 
1557    /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1558 
1559    f64  = (UChar*)(&dbl);
1560    sign = toUChar( (f64[7] >> 7) & 1 );
1561 
1562    /* First off, if the tag indicates the register was empty,
1563       return 1,0,sign,1 */
1564    if (tag == 0) {
1565       /* vex_printf("Empty\n"); */
1566       return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
1567                                    | AMD64G_FC_MASK_C0;
1568    }
1569 
1570    bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1571    bexp &= 0x7FF;
1572 
1573    mantissaIsZero
1574       = toBool(
1575            (f64[6] & 0x0F) == 0
1576            && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1577         );
1578 
1579    /* If both exponent and mantissa are zero, the value is zero.
1580       Return 1,0,sign,0. */
1581    if (bexp == 0 && mantissaIsZero) {
1582       /* vex_printf("Zero\n"); */
1583       return AMD64G_FC_MASK_C3 | 0
1584                                | (sign << AMD64G_FC_SHIFT_C1) | 0;
1585    }
1586 
1587    /* If exponent is zero but mantissa isn't, it's a denormal.
1588       Return 1,1,sign,0. */
1589    if (bexp == 0 && !mantissaIsZero) {
1590       /* vex_printf("Denormal\n"); */
1591       return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
1592                                | (sign << AMD64G_FC_SHIFT_C1) | 0;
1593    }
1594 
1595    /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1596       Return 0,1,sign,1. */
1597    if (bexp == 0x7FF && mantissaIsZero) {
1598       /* vex_printf("Inf\n"); */
1599       return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
1600                                    | AMD64G_FC_MASK_C0;
1601    }
1602 
1603    /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1604       Return 0,0,sign,1. */
1605    if (bexp == 0x7FF && !mantissaIsZero) {
1606       /* vex_printf("NaN\n"); */
1607       return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
1608    }
1609 
1610    /* Uh, ok, we give up.  It must be a normal finite number.
1611       Return 0,1,sign,0.
1612    */
1613    /* vex_printf("normal\n"); */
1614    return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
1615 }
1616 
1617 
1618 /* This is used to implement both 'frstor' and 'fldenv'.  The latter
1619    appears to differ from the former only in that the 8 FP registers
1620    themselves are not transferred into the guest state. */
1621 static
do_put_x87(Bool moveRegs,UChar * x87_state,VexGuestAMD64State * vex_state)1622 VexEmNote do_put_x87 ( Bool moveRegs,
1623                        /*IN*/UChar* x87_state,
1624                        /*OUT*/VexGuestAMD64State* vex_state )
1625 {
1626    Int        stno, preg;
1627    UInt       tag;
1628    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1629    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1630    Fpu_State* x87     = (Fpu_State*)x87_state;
1631    UInt       ftop    = (x87->env[FP_ENV_STAT] >> 11) & 7;
1632    UInt       tagw    = x87->env[FP_ENV_TAG];
1633    UInt       fpucw   = x87->env[FP_ENV_CTRL];
1634    UInt       c3210   = x87->env[FP_ENV_STAT] & 0x4700;
1635    VexEmNote  ew;
1636    UInt       fpround;
1637    ULong      pair;
1638 
1639    /* Copy registers and tags */
1640    for (stno = 0; stno < 8; stno++) {
1641       preg = (stno + ftop) & 7;
1642       tag = (tagw >> (2*preg)) & 3;
1643       if (tag == 3) {
1644          /* register is empty */
1645          /* hmm, if it's empty, does it still get written?  Probably
1646             safer to say it does.  If we don't, memcheck could get out
1647             of sync, in that it thinks all FP registers are defined by
1648             this helper, but in reality some have not been updated. */
1649          if (moveRegs)
1650             vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1651          vexTags[preg] = 0;
1652       } else {
1653          /* register is non-empty */
1654          if (moveRegs)
1655             convert_f80le_to_f64le( &x87->reg[10*stno],
1656                                     (UChar*)&vexRegs[preg] );
1657          vexTags[preg] = 1;
1658       }
1659    }
1660 
1661    /* stack pointer */
1662    vex_state->guest_FTOP = ftop;
1663 
1664    /* status word */
1665    vex_state->guest_FC3210 = c3210;
1666 
1667    /* handle the control word, setting FPROUND and detecting any
1668       emulation warnings. */
1669    pair    = amd64g_check_fldcw ( (ULong)fpucw );
1670    fpround = (UInt)pair & 0xFFFFFFFFULL;
1671    ew      = (VexEmNote)(pair >> 32);
1672 
1673    vex_state->guest_FPROUND = fpround & 3;
1674 
1675    /* emulation warnings --> caller */
1676    return ew;
1677 }
1678 
1679 
1680 /* Create an x87 FPU state from the guest state, as close as
1681    we can approximate it. */
1682 static
do_get_x87(VexGuestAMD64State * vex_state,UChar * x87_state)1683 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
1684                   /*OUT*/UChar* x87_state )
1685 {
1686    Int        i, stno, preg;
1687    UInt       tagw;
1688    ULong*     vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1689    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1690    Fpu_State* x87     = (Fpu_State*)x87_state;
1691    UInt       ftop    = vex_state->guest_FTOP;
1692    UInt       c3210   = vex_state->guest_FC3210;
1693 
1694    for (i = 0; i < 14; i++)
1695       x87->env[i] = 0;
1696 
1697    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
1698    x87->env[FP_ENV_STAT]
1699       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1700    x87->env[FP_ENV_CTRL]
1701       = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
1702 
1703    /* Dump the register stack in ST order. */
1704    tagw = 0;
1705    for (stno = 0; stno < 8; stno++) {
1706       preg = (stno + ftop) & 7;
1707       if (vexTags[preg] == 0) {
1708          /* register is empty */
1709          tagw |= (3 << (2*preg));
1710          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1711                                  &x87->reg[10*stno] );
1712       } else {
1713          /* register is full. */
1714          tagw |= (0 << (2*preg));
1715          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1716                                  &x87->reg[10*stno] );
1717       }
1718    }
1719    x87->env[FP_ENV_TAG] = toUShort(tagw);
1720 }
1721 
1722 
1723 /* CALLED FROM GENERATED CODE */
1724 /* DIRTY HELPER (reads guest state, writes guest mem) */
1725 /* NOTE: only handles 32-bit format (no REX.W on the insn) */
amd64g_dirtyhelper_FXSAVE(VexGuestAMD64State * gst,HWord addr)1726 void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr )
1727 {
1728    /* Derived from values obtained from
1729       vendor_id       : AuthenticAMD
1730       cpu family      : 15
1731       model           : 12
1732       model name      : AMD Athlon(tm) 64 Processor 3200+
1733       stepping        : 0
1734       cpu MHz         : 2200.000
1735       cache size      : 512 KB
1736    */
1737    /* Somewhat roundabout, but at least it's simple. */
1738    Fpu_State tmp;
1739    UShort*   addrS = (UShort*)addr;
1740    UChar*    addrC = (UChar*)addr;
1741    U128*     xmm   = (U128*)(addr + 160);
1742    UInt      mxcsr;
1743    UShort    fp_tags;
1744    UInt      summary_tags;
1745    Int       r, stno;
1746    UShort    *srcS, *dstS;
1747 
1748    do_get_x87( gst, (UChar*)&tmp );
1749    mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
1750 
1751    /* Now build the proper fxsave image from the x87 image we just
1752       made. */
1753 
1754    addrS[0]  = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1755    addrS[1]  = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1756 
1757    /* set addrS[2] in an endian-independent way */
1758    summary_tags = 0;
1759    fp_tags = tmp.env[FP_ENV_TAG];
1760    for (r = 0; r < 8; r++) {
1761       if ( ((fp_tags >> (2*r)) & 3) != 3 )
1762          summary_tags |= (1 << r);
1763    }
1764    addrC[4]  = toUChar(summary_tags); /* FTW: tag summary byte */
1765    addrC[5]  = 0; /* pad */
1766 
1767    /* FOP: faulting fpu opcode.  From experimentation, the real CPU
1768       does not write this field. (?!) */
1769    addrS[3]  = 0; /* BOGUS */
1770 
1771    /* RIP (Last x87 instruction pointer).  From experimentation, the
1772       real CPU does not write this field. (?!) */
1773    addrS[4]  = 0; /* BOGUS */
1774    addrS[5]  = 0; /* BOGUS */
1775    addrS[6]  = 0; /* BOGUS */
1776    addrS[7]  = 0; /* BOGUS */
1777 
1778    /* RDP (Last x87 data pointer).  From experimentation, the real CPU
1779       does not write this field. (?!) */
1780    addrS[8]  = 0; /* BOGUS */
1781    addrS[9]  = 0; /* BOGUS */
1782    addrS[10] = 0; /* BOGUS */
1783    addrS[11] = 0; /* BOGUS */
1784 
1785    addrS[12] = toUShort(mxcsr);  /* MXCSR */
1786    addrS[13] = toUShort(mxcsr >> 16);
1787 
1788    addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
1789    addrS[15] = 0x0000; /* MXCSR mask (hi16) */
1790 
1791    /* Copy in the FP registers, in ST order. */
1792    for (stno = 0; stno < 8; stno++) {
1793       srcS = (UShort*)(&tmp.reg[10*stno]);
1794       dstS = (UShort*)(&addrS[16 + 8*stno]);
1795       dstS[0] = srcS[0];
1796       dstS[1] = srcS[1];
1797       dstS[2] = srcS[2];
1798       dstS[3] = srcS[3];
1799       dstS[4] = srcS[4];
1800       dstS[5] = 0;
1801       dstS[6] = 0;
1802       dstS[7] = 0;
1803    }
1804 
1805    /* That's the first 160 bytes of the image done.  Now only %xmm0
1806       .. %xmm15 remain to be copied.  If the host is big-endian, these
1807       need to be byte-swapped. */
1808    vassert(host_is_little_endian());
1809 
1810 #  define COPY_U128(_dst,_src)                       \
1811       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1812            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1813       while (0)
1814 
1815    COPY_U128( xmm[0],  gst->guest_YMM0 );
1816    COPY_U128( xmm[1],  gst->guest_YMM1 );
1817    COPY_U128( xmm[2],  gst->guest_YMM2 );
1818    COPY_U128( xmm[3],  gst->guest_YMM3 );
1819    COPY_U128( xmm[4],  gst->guest_YMM4 );
1820    COPY_U128( xmm[5],  gst->guest_YMM5 );
1821    COPY_U128( xmm[6],  gst->guest_YMM6 );
1822    COPY_U128( xmm[7],  gst->guest_YMM7 );
1823    COPY_U128( xmm[8],  gst->guest_YMM8 );
1824    COPY_U128( xmm[9],  gst->guest_YMM9 );
1825    COPY_U128( xmm[10], gst->guest_YMM10 );
1826    COPY_U128( xmm[11], gst->guest_YMM11 );
1827    COPY_U128( xmm[12], gst->guest_YMM12 );
1828    COPY_U128( xmm[13], gst->guest_YMM13 );
1829    COPY_U128( xmm[14], gst->guest_YMM14 );
1830    COPY_U128( xmm[15], gst->guest_YMM15 );
1831 
1832 #  undef COPY_U128
1833 }
1834 
1835 
1836 /* CALLED FROM GENERATED CODE */
1837 /* DIRTY HELPER (writes guest state, reads guest mem) */
amd64g_dirtyhelper_FXRSTOR(VexGuestAMD64State * gst,HWord addr)1838 VexEmNote amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State* gst, HWord addr )
1839 {
1840    Fpu_State tmp;
1841    VexEmNote warnX87 = EmNote_NONE;
1842    VexEmNote warnXMM = EmNote_NONE;
1843    UShort*   addrS   = (UShort*)addr;
1844    UChar*    addrC   = (UChar*)addr;
1845    U128*     xmm     = (U128*)(addr + 160);
1846    UShort    fp_tags;
1847    Int       r, stno, i;
1848 
1849    /* Restore %xmm0 .. %xmm15.  If the host is big-endian, these need
1850       to be byte-swapped. */
1851    vassert(host_is_little_endian());
1852 
1853 #  define COPY_U128(_dst,_src)                       \
1854       do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
1855            _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
1856       while (0)
1857 
1858    COPY_U128( gst->guest_YMM0, xmm[0] );
1859    COPY_U128( gst->guest_YMM1, xmm[1] );
1860    COPY_U128( gst->guest_YMM2, xmm[2] );
1861    COPY_U128( gst->guest_YMM3, xmm[3] );
1862    COPY_U128( gst->guest_YMM4, xmm[4] );
1863    COPY_U128( gst->guest_YMM5, xmm[5] );
1864    COPY_U128( gst->guest_YMM6, xmm[6] );
1865    COPY_U128( gst->guest_YMM7, xmm[7] );
1866    COPY_U128( gst->guest_YMM8, xmm[8] );
1867    COPY_U128( gst->guest_YMM9, xmm[9] );
1868    COPY_U128( gst->guest_YMM10, xmm[10] );
1869    COPY_U128( gst->guest_YMM11, xmm[11] );
1870    COPY_U128( gst->guest_YMM12, xmm[12] );
1871    COPY_U128( gst->guest_YMM13, xmm[13] );
1872    COPY_U128( gst->guest_YMM14, xmm[14] );
1873    COPY_U128( gst->guest_YMM15, xmm[15] );
1874 
1875 #  undef COPY_U128
1876 
1877    /* Copy the x87 registers out of the image, into a temporary
1878       Fpu_State struct. */
1879    for (i = 0; i < 14; i++) tmp.env[i] = 0;
1880    for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1881    /* fill in tmp.reg[0..7] */
1882    for (stno = 0; stno < 8; stno++) {
1883       UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1884       UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1885       dstS[0] = srcS[0];
1886       dstS[1] = srcS[1];
1887       dstS[2] = srcS[2];
1888       dstS[3] = srcS[3];
1889       dstS[4] = srcS[4];
1890    }
1891    /* fill in tmp.env[0..13] */
1892    tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1893    tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1894 
1895    fp_tags = 0;
1896    for (r = 0; r < 8; r++) {
1897       if (addrC[4] & (1<<r))
1898          fp_tags |= (0 << (2*r)); /* EMPTY */
1899       else
1900          fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1901    }
1902    tmp.env[FP_ENV_TAG] = fp_tags;
1903 
1904    /* Now write 'tmp' into the guest state. */
1905    warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
1906 
1907    { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1908                 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1909      ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
1910 
1911      warnXMM = (VexEmNote)(w64 >> 32);
1912 
1913      gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
1914    }
1915 
1916    /* Prefer an X87 emwarn over an XMM one, if both exist. */
1917    if (warnX87 != EmNote_NONE)
1918       return warnX87;
1919    else
1920       return warnXMM;
1921 }
1922 
1923 
1924 /* DIRTY HELPER (writes guest state) */
1925 /* Initialise the x87 FPU state as per 'finit'. */
amd64g_dirtyhelper_FINIT(VexGuestAMD64State * gst)1926 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
1927 {
1928    Int i;
1929    gst->guest_FTOP = 0;
1930    for (i = 0; i < 8; i++) {
1931       gst->guest_FPTAG[i] = 0; /* empty */
1932       gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1933    }
1934    gst->guest_FPROUND = (ULong)Irrm_NEAREST;
1935    gst->guest_FC3210  = 0;
1936 }
1937 
1938 
1939 /* CALLED FROM GENERATED CODE */
1940 /* DIRTY HELPER (reads guest memory) */
amd64g_dirtyhelper_loadF80le(ULong addrU)1941 ULong amd64g_dirtyhelper_loadF80le ( ULong addrU )
1942 {
1943    ULong f64;
1944    convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 );
1945    return f64;
1946 }
1947 
1948 /* CALLED FROM GENERATED CODE */
1949 /* DIRTY HELPER (writes guest memory) */
amd64g_dirtyhelper_storeF80le(ULong addrU,ULong f64)1950 void amd64g_dirtyhelper_storeF80le ( ULong addrU, ULong f64 )
1951 {
1952    convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) );
1953 }
1954 
1955 
1956 /* CALLED FROM GENERATED CODE */
1957 /* CLEAN HELPER */
1958 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1959    Extract from it the required SSEROUND value and any resulting
1960    emulation warning, and return (warn << 32) | sseround value.
1961 */
amd64g_check_ldmxcsr(ULong mxcsr)1962 ULong amd64g_check_ldmxcsr ( ULong mxcsr )
1963 {
1964    /* Decide on a rounding mode.  mxcsr[14:13] holds it. */
1965    /* NOTE, encoded exactly as per enum IRRoundingMode. */
1966    ULong rmode = (mxcsr >> 13) & 3;
1967 
1968    /* Detect any required emulation warnings. */
1969    VexEmNote ew = EmNote_NONE;
1970 
1971    if ((mxcsr & 0x1F80) != 0x1F80) {
1972       /* unmasked exceptions! */
1973       ew = EmWarn_X86_sseExns;
1974    }
1975    else
1976    if (mxcsr & (1<<15)) {
1977       /* FZ is set */
1978       ew = EmWarn_X86_fz;
1979    }
1980    else
1981    if (mxcsr & (1<<6)) {
1982       /* DAZ is set */
1983       ew = EmWarn_X86_daz;
1984    }
1985 
1986    return (((ULong)ew) << 32) | ((ULong)rmode);
1987 }
1988 
1989 
1990 /* CALLED FROM GENERATED CODE */
1991 /* CLEAN HELPER */
1992 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1993    native format MXCSR value. */
amd64g_create_mxcsr(ULong sseround)1994 ULong amd64g_create_mxcsr ( ULong sseround )
1995 {
1996    sseround &= 3;
1997    return 0x1F80 | (sseround << 13);
1998 }
1999 
2000 
2001 /* CLEAN HELPER */
2002 /* fpucw[15:0] contains a x87 native format FPU control word.
2003    Extract from it the required FPROUND value and any resulting
2004    emulation warning, and return (warn << 32) | fpround value.
2005 */
amd64g_check_fldcw(ULong fpucw)2006 ULong amd64g_check_fldcw ( ULong fpucw )
2007 {
2008    /* Decide on a rounding mode.  fpucw[11:10] holds it. */
2009    /* NOTE, encoded exactly as per enum IRRoundingMode. */
2010    ULong rmode = (fpucw >> 10) & 3;
2011 
2012    /* Detect any required emulation warnings. */
2013    VexEmNote ew = EmNote_NONE;
2014 
2015    if ((fpucw & 0x3F) != 0x3F) {
2016       /* unmasked exceptions! */
2017       ew = EmWarn_X86_x87exns;
2018    }
2019    else
2020    if (((fpucw >> 8) & 3) != 3) {
2021       /* unsupported precision */
2022       ew = EmWarn_X86_x87precision;
2023    }
2024 
2025    return (((ULong)ew) << 32) | ((ULong)rmode);
2026 }
2027 
2028 
2029 /* CLEAN HELPER */
2030 /* Given fpround as an IRRoundingMode value, create a suitable x87
2031    native format FPU control word. */
amd64g_create_fpucw(ULong fpround)2032 ULong amd64g_create_fpucw ( ULong fpround )
2033 {
2034    fpround &= 3;
2035    return 0x037F | (fpround << 10);
2036 }
2037 
2038 
2039 /* This is used to implement 'fldenv'.
2040    Reads 28 bytes at x87_state[0 .. 27]. */
2041 /* CALLED FROM GENERATED CODE */
2042 /* DIRTY HELPER */
amd64g_dirtyhelper_FLDENV(VexGuestAMD64State * vex_state,HWord x87_state)2043 VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
2044                                       /*IN*/HWord x87_state)
2045 {
2046    return do_put_x87( False, (UChar*)x87_state, vex_state );
2047 }
2048 
2049 
2050 /* CALLED FROM GENERATED CODE */
2051 /* DIRTY HELPER */
2052 /* Create an x87 FPU env from the guest state, as close as we can
2053    approximate it.  Writes 28 bytes at x87_state[0..27]. */
amd64g_dirtyhelper_FSTENV(VexGuestAMD64State * vex_state,HWord x87_state)2054 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
2055                                  /*OUT*/HWord x87_state )
2056 {
2057    Int        i, stno, preg;
2058    UInt       tagw;
2059    UChar*     vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2060    Fpu_State* x87     = (Fpu_State*)x87_state;
2061    UInt       ftop    = vex_state->guest_FTOP;
2062    ULong      c3210   = vex_state->guest_FC3210;
2063 
2064    for (i = 0; i < 14; i++)
2065       x87->env[i] = 0;
2066 
2067    x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
2068    x87->env[FP_ENV_STAT]
2069       = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
2070    x87->env[FP_ENV_CTRL]
2071       = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
2072 
2073    /* Compute the x87 tag word. */
2074    tagw = 0;
2075    for (stno = 0; stno < 8; stno++) {
2076       preg = (stno + ftop) & 7;
2077       if (vexTags[preg] == 0) {
2078          /* register is empty */
2079          tagw |= (3 << (2*preg));
2080       } else {
2081          /* register is full. */
2082          tagw |= (0 << (2*preg));
2083       }
2084    }
2085    x87->env[FP_ENV_TAG] = toUShort(tagw);
2086 
2087    /* We don't dump the x87 registers, tho. */
2088 }
2089 
2090 
2091 /* This is used to implement 'fnsave'.
2092    Writes 108 bytes at x87_state[0 .. 107]. */
2093 /* CALLED FROM GENERATED CODE */
2094 /* DIRTY HELPER */
amd64g_dirtyhelper_FNSAVE(VexGuestAMD64State * vex_state,HWord x87_state)2095 void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state,
2096                                  /*OUT*/HWord x87_state)
2097 {
2098    do_get_x87( vex_state, (UChar*)x87_state );
2099 }
2100 
2101 
2102 /* This is used to implement 'fnsaves'.
2103    Writes 94 bytes at x87_state[0 .. 93]. */
2104 /* CALLED FROM GENERATED CODE */
2105 /* DIRTY HELPER */
amd64g_dirtyhelper_FNSAVES(VexGuestAMD64State * vex_state,HWord x87_state)2106 void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state,
2107                                   /*OUT*/HWord x87_state)
2108 {
2109    Int           i, stno, preg;
2110    UInt          tagw;
2111    ULong*        vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2112    UChar*        vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2113    Fpu_State_16* x87     = (Fpu_State_16*)x87_state;
2114    UInt          ftop    = vex_state->guest_FTOP;
2115    UInt          c3210   = vex_state->guest_FC3210;
2116 
2117    for (i = 0; i < 7; i++)
2118       x87->env[i] = 0;
2119 
2120    x87->env[FPS_ENV_STAT]
2121       = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
2122    x87->env[FPS_ENV_CTRL]
2123       = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
2124 
2125    /* Dump the register stack in ST order. */
2126    tagw = 0;
2127    for (stno = 0; stno < 8; stno++) {
2128       preg = (stno + ftop) & 7;
2129       if (vexTags[preg] == 0) {
2130          /* register is empty */
2131          tagw |= (3 << (2*preg));
2132          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2133                                  &x87->reg[10*stno] );
2134       } else {
2135          /* register is full. */
2136          tagw |= (0 << (2*preg));
2137          convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2138                                  &x87->reg[10*stno] );
2139       }
2140    }
2141    x87->env[FPS_ENV_TAG] = toUShort(tagw);
2142 }
2143 
2144 
2145 /* This is used to implement 'frstor'.
2146    Reads 108 bytes at x87_state[0 .. 107]. */
2147 /* CALLED FROM GENERATED CODE */
2148 /* DIRTY HELPER */
amd64g_dirtyhelper_FRSTOR(VexGuestAMD64State * vex_state,HWord x87_state)2149 VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state,
2150                                       /*IN*/HWord x87_state)
2151 {
2152    return do_put_x87( True, (UChar*)x87_state, vex_state );
2153 }
2154 
2155 
2156 /* This is used to implement 'frstors'.
2157    Reads 94 bytes at x87_state[0 .. 93]. */
2158 /* CALLED FROM GENERATED CODE */
2159 /* DIRTY HELPER */
amd64g_dirtyhelper_FRSTORS(VexGuestAMD64State * vex_state,HWord x87_state)2160 VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state,
2161                                        /*IN*/HWord x87_state)
2162 {
2163    Int           stno, preg;
2164    UInt          tag;
2165    ULong*        vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2166    UChar*        vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2167    Fpu_State_16* x87     = (Fpu_State_16*)x87_state;
2168    UInt          ftop    = (x87->env[FPS_ENV_STAT] >> 11) & 7;
2169    UInt          tagw    = x87->env[FPS_ENV_TAG];
2170    UInt          fpucw   = x87->env[FPS_ENV_CTRL];
2171    UInt          c3210   = x87->env[FPS_ENV_STAT] & 0x4700;
2172    VexEmNote     ew;
2173    UInt          fpround;
2174    ULong         pair;
2175 
2176    /* Copy registers and tags */
2177    for (stno = 0; stno < 8; stno++) {
2178       preg = (stno + ftop) & 7;
2179       tag = (tagw >> (2*preg)) & 3;
2180       if (tag == 3) {
2181          /* register is empty */
2182          /* hmm, if it's empty, does it still get written?  Probably
2183             safer to say it does.  If we don't, memcheck could get out
2184             of sync, in that it thinks all FP registers are defined by
2185             this helper, but in reality some have not been updated. */
2186          vexRegs[preg] = 0; /* IEEE754 64-bit zero */
2187          vexTags[preg] = 0;
2188       } else {
2189          /* register is non-empty */
2190          convert_f80le_to_f64le( &x87->reg[10*stno],
2191                                  (UChar*)&vexRegs[preg] );
2192          vexTags[preg] = 1;
2193       }
2194    }
2195 
2196    /* stack pointer */
2197    vex_state->guest_FTOP = ftop;
2198 
2199    /* status word */
2200    vex_state->guest_FC3210 = c3210;
2201 
2202    /* handle the control word, setting FPROUND and detecting any
2203       emulation warnings. */
2204    pair    = amd64g_check_fldcw ( (ULong)fpucw );
2205    fpround = (UInt)pair & 0xFFFFFFFFULL;
2206    ew      = (VexEmNote)(pair >> 32);
2207 
2208    vex_state->guest_FPROUND = fpround & 3;
2209 
2210    /* emulation warnings --> caller */
2211    return ew;
2212 }
2213 
2214 
2215 /*---------------------------------------------------------------*/
2216 /*--- Misc integer helpers, including rotates and CPUID.      ---*/
2217 /*---------------------------------------------------------------*/
2218 
2219 /* Claim to be the following CPU, which is probably representative of
2220    the lowliest (earliest) amd64 offerings.  It can do neither sse3
2221    nor cx16.
2222 
2223    vendor_id       : AuthenticAMD
2224    cpu family      : 15
2225    model           : 5
2226    model name      : AMD Opteron (tm) Processor 848
2227    stepping        : 10
2228    cpu MHz         : 1797.682
2229    cache size      : 1024 KB
2230    fpu             : yes
2231    fpu_exception   : yes
2232    cpuid level     : 1
2233    wp              : yes
2234    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2235                      mtrr pge mca cmov pat pse36 clflush mmx fxsr
2236                      sse sse2 syscall nx mmxext lm 3dnowext 3dnow
2237    bogomips        : 3600.62
2238    TLB size        : 1088 4K pages
2239    clflush size    : 64
2240    cache_alignment : 64
2241    address sizes   : 40 bits physical, 48 bits virtual
2242    power management: ts fid vid ttp
2243 
2244    2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact
2245    we don't support them.  See #291568.  3dnow is 80000001.EDX.31
2246    and 3dnowext is 80000001.EDX.30.
2247 */
amd64g_dirtyhelper_CPUID_baseline(VexGuestAMD64State * st)2248 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
2249 {
2250 #  define SET_ABCD(_a,_b,_c,_d)                \
2251       do { st->guest_RAX = (ULong)(_a);        \
2252            st->guest_RBX = (ULong)(_b);        \
2253            st->guest_RCX = (ULong)(_c);        \
2254            st->guest_RDX = (ULong)(_d);        \
2255       } while (0)
2256 
2257    switch (0xFFFFFFFF & st->guest_RAX) {
2258       case 0x00000000:
2259          SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
2260          break;
2261       case 0x00000001:
2262          SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
2263          break;
2264       case 0x80000000:
2265          SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
2266          break;
2267       case 0x80000001:
2268          /* Don't claim to support 3dnow or 3dnowext.  0xe1d3fbff is
2269             the original it-is-supported value that the h/w provides.
2270             See #291568. */
2271          SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/
2272                                                       0x21d3fbff);
2273          break;
2274       case 0x80000002:
2275          SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
2276          break;
2277       case 0x80000003:
2278          SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
2279          break;
2280       case 0x80000004:
2281          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2282          break;
2283       case 0x80000005:
2284          SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
2285          break;
2286       case 0x80000006:
2287          SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
2288          break;
2289       case 0x80000007:
2290          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
2291          break;
2292       case 0x80000008:
2293          SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
2294          break;
2295       default:
2296          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2297          break;
2298    }
2299 #  undef SET_ABCD
2300 }
2301 
2302 
2303 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
2304    capable.
2305 
2306    vendor_id       : GenuineIntel
2307    cpu family      : 6
2308    model           : 15
2309    model name      : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2310    stepping        : 6
2311    cpu MHz         : 2394.000
2312    cache size      : 4096 KB
2313    physical id     : 0
2314    siblings        : 2
2315    core id         : 0
2316    cpu cores       : 2
2317    fpu             : yes
2318    fpu_exception   : yes
2319    cpuid level     : 10
2320    wp              : yes
2321    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2322                      mtrr pge mca cmov pat pse36 clflush dts acpi
2323                      mmx fxsr sse sse2 ss ht tm syscall nx lm
2324                      constant_tsc pni monitor ds_cpl vmx est tm2
2325                      cx16 xtpr lahf_lm
2326    bogomips        : 4798.78
2327    clflush size    : 64
2328    cache_alignment : 64
2329    address sizes   : 36 bits physical, 48 bits virtual
2330    power management:
2331 */
amd64g_dirtyhelper_CPUID_sse3_and_cx16(VexGuestAMD64State * st)2332 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
2333 {
2334 #  define SET_ABCD(_a,_b,_c,_d)                \
2335       do { st->guest_RAX = (ULong)(_a);        \
2336            st->guest_RBX = (ULong)(_b);        \
2337            st->guest_RCX = (ULong)(_c);        \
2338            st->guest_RDX = (ULong)(_d);        \
2339       } while (0)
2340 
2341    switch (0xFFFFFFFF & st->guest_RAX) {
2342       case 0x00000000:
2343          SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2344          break;
2345       case 0x00000001:
2346          SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2347          break;
2348       case 0x00000002:
2349          SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2350          break;
2351       case 0x00000003:
2352          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2353          break;
2354       case 0x00000004: {
2355          switch (0xFFFFFFFF & st->guest_RCX) {
2356             case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2357                                       0x0000003f, 0x00000001); break;
2358             case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2359                                       0x0000003f, 0x00000001); break;
2360             case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2361                                       0x00000fff, 0x00000001); break;
2362             default:         SET_ABCD(0x00000000, 0x00000000,
2363                                       0x00000000, 0x00000000); break;
2364          }
2365          break;
2366       }
2367       case 0x00000005:
2368          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2369          break;
2370       case 0x00000006:
2371          SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2372          break;
2373       case 0x00000007:
2374          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2375          break;
2376       case 0x00000008:
2377          SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2378          break;
2379       case 0x00000009:
2380          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2381          break;
2382       case 0x0000000a:
2383       unhandled_eax_value:
2384          SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2385          break;
2386       case 0x80000000:
2387          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2388          break;
2389       case 0x80000001:
2390          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
2391          break;
2392       case 0x80000002:
2393          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2394          break;
2395       case 0x80000003:
2396          SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2397          break;
2398       case 0x80000004:
2399          SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2400          break;
2401       case 0x80000005:
2402          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2403          break;
2404       case 0x80000006:
2405          SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2406          break;
2407       case 0x80000007:
2408          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2409          break;
2410       case 0x80000008:
2411          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2412          break;
2413       default:
2414          goto unhandled_eax_value;
2415    }
2416 #  undef SET_ABCD
2417 }
2418 
2419 
2420 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
2421    capable.
2422 
2423    vendor_id       : GenuineIntel
2424    cpu family      : 6
2425    model           : 37
2426    model name      : Intel(R) Core(TM) i5 CPU         670  @ 3.47GHz
2427    stepping        : 2
2428    cpu MHz         : 3334.000
2429    cache size      : 4096 KB
2430    physical id     : 0
2431    siblings        : 4
2432    core id         : 0
2433    cpu cores       : 2
2434    apicid          : 0
2435    initial apicid  : 0
2436    fpu             : yes
2437    fpu_exception   : yes
2438    cpuid level     : 11
2439    wp              : yes
2440    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2441                      mtrr pge mca cmov pat pse36 clflush dts acpi
2442                      mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2443                      lm constant_tsc arch_perfmon pebs bts rep_good
2444                      xtopology nonstop_tsc aperfmperf pni pclmulqdq
2445                      dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
2446                      xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
2447                      arat tpr_shadow vnmi flexpriority ept vpid
2448    bogomips        : 6957.57
2449    clflush size    : 64
2450    cache_alignment : 64
2451    address sizes   : 36 bits physical, 48 bits virtual
2452    power management:
2453 */
amd64g_dirtyhelper_CPUID_sse42_and_cx16(VexGuestAMD64State * st)2454 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
2455 {
2456 #  define SET_ABCD(_a,_b,_c,_d)                \
2457       do { st->guest_RAX = (ULong)(_a);        \
2458            st->guest_RBX = (ULong)(_b);        \
2459            st->guest_RCX = (ULong)(_c);        \
2460            st->guest_RDX = (ULong)(_d);        \
2461       } while (0)
2462 
2463    UInt old_eax = (UInt)st->guest_RAX;
2464    UInt old_ecx = (UInt)st->guest_RCX;
2465 
2466    switch (old_eax) {
2467       case 0x00000000:
2468          SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
2469          break;
2470       case 0x00000001:
2471          SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff);
2472          break;
2473       case 0x00000002:
2474          SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
2475          break;
2476       case 0x00000003:
2477          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2478          break;
2479       case 0x00000004:
2480          switch (old_ecx) {
2481             case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2482                                       0x0000003f, 0x00000000); break;
2483             case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
2484                                       0x0000007f, 0x00000000); break;
2485             case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2486                                       0x000001ff, 0x00000000); break;
2487             case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
2488                                       0x00000fff, 0x00000002); break;
2489             default:         SET_ABCD(0x00000000, 0x00000000,
2490                                       0x00000000, 0x00000000); break;
2491          }
2492          break;
2493       case 0x00000005:
2494          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2495          break;
2496       case 0x00000006:
2497          SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
2498          break;
2499       case 0x00000007:
2500          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2501          break;
2502       case 0x00000008:
2503          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2504          break;
2505       case 0x00000009:
2506          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2507          break;
2508       case 0x0000000a:
2509          SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
2510          break;
2511       case 0x0000000b:
2512          switch (old_ecx) {
2513             case 0x00000000:
2514                SET_ABCD(0x00000001, 0x00000002,
2515                         0x00000100, 0x00000000); break;
2516             case 0x00000001:
2517                SET_ABCD(0x00000004, 0x00000004,
2518                         0x00000201, 0x00000000); break;
2519             default:
2520                SET_ABCD(0x00000000, 0x00000000,
2521                         old_ecx,    0x00000000); break;
2522          }
2523          break;
2524       case 0x0000000c:
2525          SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2526          break;
2527       case 0x0000000d:
2528          switch (old_ecx) {
2529             case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
2530                                       0x00000100, 0x00000000); break;
2531             case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
2532                                       0x00000201, 0x00000000); break;
2533             default:         SET_ABCD(0x00000000, 0x00000000,
2534                                       old_ecx,    0x00000000); break;
2535          }
2536          break;
2537       case 0x80000000:
2538          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2539          break;
2540       case 0x80000001:
2541          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
2542          break;
2543       case 0x80000002:
2544          SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2545          break;
2546       case 0x80000003:
2547          SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
2548          break;
2549       case 0x80000004:
2550          SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
2551          break;
2552       case 0x80000005:
2553          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2554          break;
2555       case 0x80000006:
2556          SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
2557          break;
2558       case 0x80000007:
2559          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
2560          break;
2561       case 0x80000008:
2562          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2563          break;
2564       default:
2565          SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
2566          break;
2567    }
2568 #  undef SET_ABCD
2569 }
2570 
2571 
2572 /* Claim to be the following CPU (4 x ...), which is AVX and cx16
2573    capable.  Plus (kludge!) it "supports" HTM.
2574 
2575    vendor_id       : GenuineIntel
2576    cpu family      : 6
2577    model           : 42
2578    model name      : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz
2579    stepping        : 7
2580    cpu MHz         : 1600.000
2581    cache size      : 6144 KB
2582    physical id     : 0
2583    siblings        : 4
2584    core id         : 3
2585    cpu cores       : 4
2586    apicid          : 6
2587    initial apicid  : 6
2588    fpu             : yes
2589    fpu_exception   : yes
2590    cpuid level     : 13
2591    wp              : yes
2592    flags           : fpu vme de pse tsc msr pae mce cx8 apic sep
2593                      mtrr pge mca cmov pat pse36 clflush dts acpi
2594                      mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
2595                      lm constant_tsc arch_perfmon pebs bts rep_good
2596                      nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq
2597                      dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16
2598                      xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx
2599                      lahf_lm ida arat epb xsaveopt pln pts dts
2600                      tpr_shadow vnmi flexpriority ept vpid
2601 
2602    bogomips        : 5768.94
2603    clflush size    : 64
2604    cache_alignment : 64
2605    address sizes   : 36 bits physical, 48 bits virtual
2606    power management:
2607 */
amd64g_dirtyhelper_CPUID_avx_and_cx16(VexGuestAMD64State * st)2608 void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st )
2609 {
2610 #  define SET_ABCD(_a,_b,_c,_d)                \
2611       do { st->guest_RAX = (ULong)(_a);        \
2612            st->guest_RBX = (ULong)(_b);        \
2613            st->guest_RCX = (ULong)(_c);        \
2614            st->guest_RDX = (ULong)(_d);        \
2615       } while (0)
2616 
2617    UInt old_eax = (UInt)st->guest_RAX;
2618    UInt old_ecx = (UInt)st->guest_RCX;
2619 
2620    switch (old_eax) {
2621       case 0x00000000:
2622          SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
2623          break;
2624       case 0x00000001:
2625          SET_ABCD(0x000206a7, 0x00100800, 0x1f9ae3bf, 0xbfebfbff);
2626          break;
2627       case 0x00000002:
2628          SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000);
2629          break;
2630       case 0x00000003:
2631          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2632          break;
2633       case 0x00000004:
2634          switch (old_ecx) {
2635             case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
2636                                       0x0000003f, 0x00000000); break;
2637             case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
2638                                       0x0000003f, 0x00000000); break;
2639             case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
2640                                       0x000001ff, 0x00000000); break;
2641             case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f,
2642                                       0x00001fff, 0x00000006); break;
2643             default:         SET_ABCD(0x00000000, 0x00000000,
2644                                       0x00000000, 0x00000000); break;
2645          }
2646          break;
2647       case 0x00000005:
2648          SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
2649          break;
2650       case 0x00000006:
2651          SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
2652          break;
2653       case 0x00000007:
2654          SET_ABCD(0x00000000, 0x00000800, 0x00000000, 0x00000000);
2655          break;
2656       case 0x00000008:
2657          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2658          break;
2659       case 0x00000009:
2660          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2661          break;
2662       case 0x0000000a:
2663          SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
2664          break;
2665       case 0x0000000b:
2666          switch (old_ecx) {
2667             case 0x00000000:
2668                SET_ABCD(0x00000001, 0x00000001,
2669                         0x00000100, 0x00000000); break;
2670             case 0x00000001:
2671                SET_ABCD(0x00000004, 0x00000004,
2672                         0x00000201, 0x00000000); break;
2673             default:
2674                SET_ABCD(0x00000000, 0x00000000,
2675                         old_ecx,    0x00000000); break;
2676          }
2677          break;
2678       case 0x0000000c:
2679          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2680          break;
2681       case 0x0000000d:
2682          switch (old_ecx) {
2683             case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
2684                                       0x00000340, 0x00000000); break;
2685             case 0x00000001: SET_ABCD(0x00000001, 0x00000000,
2686                                       0x00000000, 0x00000000); break;
2687             case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
2688                                       0x00000000, 0x00000000); break;
2689             default:         SET_ABCD(0x00000000, 0x00000000,
2690                                       0x00000000, 0x00000000); break;
2691          }
2692          break;
2693       case 0x0000000e:
2694          SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
2695          break;
2696       case 0x0000000f:
2697          SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
2698          break;
2699       case 0x80000000:
2700          SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2701          break;
2702       case 0x80000001:
2703          SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
2704          break;
2705       case 0x80000002:
2706          SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c);
2707          break;
2708       case 0x80000003:
2709          SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d);
2710          break;
2711       case 0x80000004:
2712          SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847);
2713          break;
2714       case 0x80000005:
2715          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2716          break;
2717       case 0x80000006:
2718          SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
2719          break;
2720       case 0x80000007:
2721          SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
2722          break;
2723       case 0x80000008:
2724          SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2725          break;
2726       default:
2727          SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
2728          break;
2729    }
2730 #  undef SET_ABCD
2731 }
2732 
2733 
amd64g_calculate_RCR(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)2734 ULong amd64g_calculate_RCR ( ULong arg,
2735                              ULong rot_amt,
2736                              ULong rflags_in,
2737                              Long  szIN )
2738 {
2739    Bool  wantRflags = toBool(szIN < 0);
2740    ULong sz         = wantRflags ? (-szIN) : szIN;
2741    ULong tempCOUNT  = rot_amt & (sz == 8 ? 0x3F : 0x1F);
2742    ULong cf=0, of=0, tempcf;
2743 
2744    switch (sz) {
2745       case 8:
2746          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2747          of        = ((arg >> 63) ^ cf) & 1;
2748          while (tempCOUNT > 0) {
2749             tempcf = arg & 1;
2750             arg    = (arg >> 1) | (cf << 63);
2751             cf     = tempcf;
2752             tempCOUNT--;
2753          }
2754          break;
2755       case 4:
2756          while (tempCOUNT >= 33) tempCOUNT -= 33;
2757          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2758          of        = ((arg >> 31) ^ cf) & 1;
2759          while (tempCOUNT > 0) {
2760             tempcf = arg & 1;
2761             arg    = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
2762             cf     = tempcf;
2763             tempCOUNT--;
2764          }
2765          break;
2766       case 2:
2767          while (tempCOUNT >= 17) tempCOUNT -= 17;
2768          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2769          of        = ((arg >> 15) ^ cf) & 1;
2770          while (tempCOUNT > 0) {
2771             tempcf = arg & 1;
2772             arg    = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
2773             cf     = tempcf;
2774             tempCOUNT--;
2775          }
2776          break;
2777       case 1:
2778          while (tempCOUNT >= 9) tempCOUNT -= 9;
2779          cf        = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2780          of        = ((arg >> 7) ^ cf) & 1;
2781          while (tempCOUNT > 0) {
2782             tempcf = arg & 1;
2783             arg    = ((arg >> 1) & 0x7FULL) | (cf << 7);
2784             cf     = tempcf;
2785             tempCOUNT--;
2786          }
2787          break;
2788       default:
2789          vpanic("calculate_RCR(amd64g): invalid size");
2790    }
2791 
2792    cf &= 1;
2793    of &= 1;
2794    rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
2795    rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
2796 
2797    /* caller can ask to have back either the resulting flags or
2798       resulting value, but not both */
2799    return wantRflags ? rflags_in : arg;
2800 }
2801 
amd64g_calculate_RCL(ULong arg,ULong rot_amt,ULong rflags_in,Long szIN)2802 ULong amd64g_calculate_RCL ( ULong arg,
2803                              ULong rot_amt,
2804                              ULong rflags_in,
2805                              Long  szIN )
2806 {
2807    Bool  wantRflags = toBool(szIN < 0);
2808    ULong sz         = wantRflags ? (-szIN) : szIN;
2809    ULong tempCOUNT  = rot_amt & (sz == 8 ? 0x3F : 0x1F);
2810    ULong cf=0, of=0, tempcf;
2811 
2812    switch (sz) {
2813       case 8:
2814          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2815          while (tempCOUNT > 0) {
2816             tempcf = (arg >> 63) & 1;
2817             arg    = (arg << 1) | (cf & 1);
2818             cf     = tempcf;
2819             tempCOUNT--;
2820          }
2821          of = ((arg >> 63) ^ cf) & 1;
2822          break;
2823       case 4:
2824          while (tempCOUNT >= 33) tempCOUNT -= 33;
2825          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2826          while (tempCOUNT > 0) {
2827             tempcf = (arg >> 31) & 1;
2828             arg    = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
2829             cf     = tempcf;
2830             tempCOUNT--;
2831          }
2832          of = ((arg >> 31) ^ cf) & 1;
2833          break;
2834       case 2:
2835          while (tempCOUNT >= 17) tempCOUNT -= 17;
2836          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2837          while (tempCOUNT > 0) {
2838             tempcf = (arg >> 15) & 1;
2839             arg    = 0xFFFFULL & ((arg << 1) | (cf & 1));
2840             cf     = tempcf;
2841             tempCOUNT--;
2842          }
2843          of = ((arg >> 15) ^ cf) & 1;
2844          break;
2845       case 1:
2846          while (tempCOUNT >= 9) tempCOUNT -= 9;
2847          cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
2848          while (tempCOUNT > 0) {
2849             tempcf = (arg >> 7) & 1;
2850             arg    = 0xFFULL & ((arg << 1) | (cf & 1));
2851             cf     = tempcf;
2852             tempCOUNT--;
2853          }
2854          of = ((arg >> 7) ^ cf) & 1;
2855          break;
2856       default:
2857          vpanic("calculate_RCL(amd64g): invalid size");
2858    }
2859 
2860    cf &= 1;
2861    of &= 1;
2862    rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
2863    rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
2864 
2865    return wantRflags ? rflags_in : arg;
2866 }
2867 
2868 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
2869  * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
2870  */
amd64g_calculate_pclmul(ULong a,ULong b,ULong which)2871 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
2872 {
2873     ULong hi, lo, tmp, A[16];
2874 
2875    A[0] = 0;            A[1] = a;
2876    A[2] = A[1] << 1;    A[3] = A[2] ^ a;
2877    A[4] = A[2] << 1;    A[5] = A[4] ^ a;
2878    A[6] = A[3] << 1;    A[7] = A[6] ^ a;
2879    A[8] = A[4] << 1;    A[9] = A[8] ^ a;
2880    A[10] = A[5] << 1;   A[11] = A[10] ^ a;
2881    A[12] = A[6] << 1;   A[13] = A[12] ^ a;
2882    A[14] = A[7] << 1;   A[15] = A[14] ^ a;
2883 
2884    lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
2885    hi = lo >> 56;
2886    lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
2887    hi = (hi << 8) | (lo >> 56);
2888    lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
2889    hi = (hi << 8) | (lo >> 56);
2890    lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
2891    hi = (hi << 8) | (lo >> 56);
2892    lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
2893    hi = (hi << 8) | (lo >> 56);
2894    lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
2895    hi = (hi << 8) | (lo >> 56);
2896    lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
2897    hi = (hi << 8) | (lo >> 56);
2898    lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
2899 
2900    ULong m0 = -1;
2901    m0 /= 255;
2902    tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
2903    tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
2904    tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
2905    tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
2906    tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
2907    tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
2908    tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
2909 
2910    return which ? hi : lo;
2911 }
2912 
2913 
2914 /* CALLED FROM GENERATED CODE */
2915 /* DIRTY HELPER (non-referentially-transparent) */
2916 /* Horrible hack.  On non-amd64 platforms, return 1. */
amd64g_dirtyhelper_RDTSC(void)2917 ULong amd64g_dirtyhelper_RDTSC ( void )
2918 {
2919 #  if defined(__x86_64__)
2920    UInt  eax, edx;
2921    __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
2922    return (((ULong)edx) << 32) | ((ULong)eax);
2923 #  else
2924    return 1ULL;
2925 #  endif
2926 }
2927 
2928 /* CALLED FROM GENERATED CODE */
2929 /* DIRTY HELPER (non-referentially-transparent) */
2930 /* Horrible hack.  On non-amd64 platforms, return 1. */
2931 /* This uses a different calling convention from _RDTSC just above
2932    only because of the difficulty of returning 96 bits from a C
2933    function -- RDTSC returns 64 bits and so is simple by comparison,
2934    on amd64. */
amd64g_dirtyhelper_RDTSCP(VexGuestAMD64State * st)2935 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st )
2936 {
2937 #  if defined(__x86_64__)
2938    UInt eax, ecx, edx;
2939    __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx));
2940    st->guest_RAX = (ULong)eax;
2941    st->guest_RCX = (ULong)ecx;
2942    st->guest_RDX = (ULong)edx;
2943 #  else
2944    /* Do nothing. */
2945 #  endif
2946 }
2947 
2948 /* CALLED FROM GENERATED CODE */
2949 /* DIRTY HELPER (non-referentially-transparent) */
2950 /* Horrible hack.  On non-amd64 platforms, return 0. */
amd64g_dirtyhelper_IN(ULong portno,ULong sz)2951 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
2952 {
2953 #  if defined(__x86_64__)
2954    ULong r = 0;
2955    portno &= 0xFFFF;
2956    switch (sz) {
2957       case 4:
2958          __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
2959                               : "=a" (r) : "Nd" (portno));
2960 	 break;
2961       case 2:
2962          __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
2963                               : "=a" (r) : "Nd" (portno));
2964 	 break;
2965       case 1:
2966          __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
2967                               : "=a" (r) : "Nd" (portno));
2968 	 break;
2969       default:
2970          break; /* note: no 64-bit version of insn exists */
2971    }
2972    return r;
2973 #  else
2974    return 0;
2975 #  endif
2976 }
2977 
2978 
2979 /* CALLED FROM GENERATED CODE */
2980 /* DIRTY HELPER (non-referentially-transparent) */
2981 /* Horrible hack.  On non-amd64 platforms, do nothing. */
amd64g_dirtyhelper_OUT(ULong portno,ULong data,ULong sz)2982 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
2983 {
2984 #  if defined(__x86_64__)
2985    portno &= 0xFFFF;
2986    switch (sz) {
2987       case 4:
2988          __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
2989                               : : "a" (data), "Nd" (portno));
2990 	 break;
2991       case 2:
2992          __asm__ __volatile__("outw %w0, %w1"
2993                               : : "a" (data), "Nd" (portno));
2994 	 break;
2995       case 1:
2996          __asm__ __volatile__("outb %b0, %w1"
2997                               : : "a" (data), "Nd" (portno));
2998 	 break;
2999       default:
3000          break; /* note: no 64-bit version of insn exists */
3001    }
3002 #  else
3003    /* do nothing */
3004 #  endif
3005 }
3006 
3007 /* CALLED FROM GENERATED CODE */
3008 /* DIRTY HELPER (non-referentially-transparent) */
3009 /* Horrible hack.  On non-amd64 platforms, do nothing. */
3010 /* op = 0: call the native SGDT instruction.
3011    op = 1: call the native SIDT instruction.
3012 */
amd64g_dirtyhelper_SxDT(void * address,ULong op)3013 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
3014 #  if defined(__x86_64__)
3015    switch (op) {
3016       case 0:
3017          __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
3018          break;
3019       case 1:
3020          __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
3021          break;
3022       default:
3023          vpanic("amd64g_dirtyhelper_SxDT");
3024    }
3025 #  else
3026    /* do nothing */
3027    UChar* p = (UChar*)address;
3028    p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
3029    p[6] = p[7] = p[8] = p[9] = 0;
3030 #  endif
3031 }
3032 
3033 /*---------------------------------------------------------------*/
3034 /*--- Helpers for MMX/SSE/SSE2.                               ---*/
3035 /*---------------------------------------------------------------*/
3036 
abdU8(UChar xx,UChar yy)3037 static inline UChar abdU8 ( UChar xx, UChar yy ) {
3038    return toUChar(xx>yy ? xx-yy : yy-xx);
3039 }
3040 
mk32x2(UInt w1,UInt w0)3041 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
3042    return (((ULong)w1) << 32) | ((ULong)w0);
3043 }
3044 
sel16x4_3(ULong w64)3045 static inline UShort sel16x4_3 ( ULong w64 ) {
3046    UInt hi32 = toUInt(w64 >> 32);
3047    return toUShort(hi32 >> 16);
3048 }
sel16x4_2(ULong w64)3049 static inline UShort sel16x4_2 ( ULong w64 ) {
3050    UInt hi32 = toUInt(w64 >> 32);
3051    return toUShort(hi32);
3052 }
sel16x4_1(ULong w64)3053 static inline UShort sel16x4_1 ( ULong w64 ) {
3054    UInt lo32 = toUInt(w64);
3055    return toUShort(lo32 >> 16);
3056 }
sel16x4_0(ULong w64)3057 static inline UShort sel16x4_0 ( ULong w64 ) {
3058    UInt lo32 = toUInt(w64);
3059    return toUShort(lo32);
3060 }
3061 
sel8x8_7(ULong w64)3062 static inline UChar sel8x8_7 ( ULong w64 ) {
3063    UInt hi32 = toUInt(w64 >> 32);
3064    return toUChar(hi32 >> 24);
3065 }
sel8x8_6(ULong w64)3066 static inline UChar sel8x8_6 ( ULong w64 ) {
3067    UInt hi32 = toUInt(w64 >> 32);
3068    return toUChar(hi32 >> 16);
3069 }
sel8x8_5(ULong w64)3070 static inline UChar sel8x8_5 ( ULong w64 ) {
3071    UInt hi32 = toUInt(w64 >> 32);
3072    return toUChar(hi32 >> 8);
3073 }
sel8x8_4(ULong w64)3074 static inline UChar sel8x8_4 ( ULong w64 ) {
3075    UInt hi32 = toUInt(w64 >> 32);
3076    return toUChar(hi32 >> 0);
3077 }
sel8x8_3(ULong w64)3078 static inline UChar sel8x8_3 ( ULong w64 ) {
3079    UInt lo32 = toUInt(w64);
3080    return toUChar(lo32 >> 24);
3081 }
sel8x8_2(ULong w64)3082 static inline UChar sel8x8_2 ( ULong w64 ) {
3083    UInt lo32 = toUInt(w64);
3084    return toUChar(lo32 >> 16);
3085 }
sel8x8_1(ULong w64)3086 static inline UChar sel8x8_1 ( ULong w64 ) {
3087    UInt lo32 = toUInt(w64);
3088    return toUChar(lo32 >> 8);
3089 }
sel8x8_0(ULong w64)3090 static inline UChar sel8x8_0 ( ULong w64 ) {
3091    UInt lo32 = toUInt(w64);
3092    return toUChar(lo32 >> 0);
3093 }
3094 
3095 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_pmaddwd(ULong xx,ULong yy)3096 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
3097 {
3098    return
3099       mk32x2(
3100          (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
3101             + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
3102          (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
3103             + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
3104       );
3105 }
3106 
3107 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_mmx_psadbw(ULong xx,ULong yy)3108 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
3109 {
3110    UInt t = 0;
3111    t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
3112    t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
3113    t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
3114    t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
3115    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
3116    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
3117    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
3118    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
3119    t &= 0xFFFF;
3120    return (ULong)t;
3121 }
3122 
3123 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_sse_phminposuw(ULong sLo,ULong sHi)3124 ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi )
3125 {
3126    UShort t, min;
3127    UInt   idx;
3128    t = sel16x4_0(sLo); if (True)    { min = t; idx = 0; }
3129    t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; }
3130    t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; }
3131    t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; }
3132    t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; }
3133    t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; }
3134    t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; }
3135    t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; }
3136    return ((ULong)(idx << 16)) | ((ULong)min);
3137 }
3138 
3139 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32b(ULong crcIn,ULong b)3140 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
3141 {
3142    UInt  i;
3143    ULong crc = (b & 0xFFULL) ^ crcIn;
3144    for (i = 0; i < 8; i++)
3145       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3146    return crc;
3147 }
3148 
3149 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32w(ULong crcIn,ULong w)3150 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w )
3151 {
3152    UInt  i;
3153    ULong crc = (w & 0xFFFFULL) ^ crcIn;
3154    for (i = 0; i < 16; i++)
3155       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3156    return crc;
3157 }
3158 
3159 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32l(ULong crcIn,ULong l)3160 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l )
3161 {
3162    UInt i;
3163    ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn;
3164    for (i = 0; i < 32; i++)
3165       crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
3166    return crc;
3167 }
3168 
3169 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_crc32q(ULong crcIn,ULong q)3170 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q )
3171 {
3172    ULong crc = amd64g_calc_crc32l(crcIn, q);
3173    return amd64g_calc_crc32l(crc, q >> 32);
3174 }
3175 
3176 
3177 /* .. helper for next fn .. */
sad_8x4(ULong xx,ULong yy)3178 static inline ULong sad_8x4 ( ULong xx, ULong yy )
3179 {
3180    UInt t = 0;
3181    t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
3182    t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
3183    t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
3184    t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
3185    return (ULong)t;
3186 }
3187 
3188 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calc_mpsadbw(ULong sHi,ULong sLo,ULong dHi,ULong dLo,ULong imm_and_return_control_bit)3189 ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo,
3190                             ULong dHi, ULong dLo,
3191                             ULong imm_and_return_control_bit )
3192 {
3193    UInt imm8     = imm_and_return_control_bit & 7;
3194    Bool calcHi   = (imm_and_return_control_bit >> 7) & 1;
3195    UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */
3196    UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */
3197    /* For src we only need 32 bits, so get them into the
3198       lower half of a 64 bit word. */
3199    ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1));
3200    /* For dst we need to get hold of 56 bits (7 bytes) from a total of
3201       11 bytes.  If calculating the low part of the result, need bytes
3202       dstOffsL * 4 + (0 .. 6); if calculating the high part,
3203       dstOffsL * 4 + (4 .. 10). */
3204    ULong dst;
3205    /* dstOffL = 0, Lo  ->  0 .. 6
3206       dstOffL = 1, Lo  ->  4 .. 10
3207       dstOffL = 0, Hi  ->  4 .. 10
3208       dstOffL = 1, Hi  ->  8 .. 14
3209    */
3210    if (calcHi && dstOffsL) {
3211       /* 8 .. 14 */
3212       dst = dHi & 0x00FFFFFFFFFFFFFFULL;
3213    }
3214    else if (!calcHi && !dstOffsL) {
3215       /* 0 .. 6 */
3216       dst = dLo & 0x00FFFFFFFFFFFFFFULL;
3217    }
3218    else {
3219       /* 4 .. 10 */
3220       dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32);
3221    }
3222    ULong r0  = sad_8x4( dst >>  0, src );
3223    ULong r1  = sad_8x4( dst >>  8, src );
3224    ULong r2  = sad_8x4( dst >> 16, src );
3225    ULong r3  = sad_8x4( dst >> 24, src );
3226    ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0;
3227    return res;
3228 }
3229 
3230 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_pext(ULong src_masked,ULong mask)3231 ULong amd64g_calculate_pext ( ULong src_masked, ULong mask )
3232 {
3233    ULong dst = 0;
3234    ULong src_bit;
3235    ULong dst_bit = 1;
3236    for (src_bit = 1; src_bit; src_bit <<= 1) {
3237       if (mask & src_bit) {
3238          if (src_masked & src_bit) dst |= dst_bit;
3239          dst_bit <<= 1;
3240       }
3241    }
3242    return dst;
3243 }
3244 
3245 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
amd64g_calculate_pdep(ULong src,ULong mask)3246 ULong amd64g_calculate_pdep ( ULong src, ULong mask )
3247 {
3248    ULong dst = 0;
3249    ULong dst_bit;
3250    ULong src_bit = 1;
3251    for (dst_bit = 1; dst_bit; dst_bit <<= 1) {
3252       if (mask & dst_bit) {
3253          if (src & src_bit) dst |= dst_bit;
3254          src_bit <<= 1;
3255       }
3256    }
3257    return dst;
3258 }
3259 
3260 /*---------------------------------------------------------------*/
3261 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M}                    ---*/
3262 /*---------------------------------------------------------------*/
3263 
zmask_from_V128(V128 * arg)3264 static UInt zmask_from_V128 ( V128* arg )
3265 {
3266    UInt i, res = 0;
3267    for (i = 0; i < 16; i++) {
3268       res |=  ((arg->w8[i] == 0) ? 1 : 0) << i;
3269    }
3270    return res;
3271 }
3272 
zmask_from_V128_wide(V128 * arg)3273 static UInt zmask_from_V128_wide ( V128* arg )
3274 {
3275    UInt i, res = 0;
3276    for (i = 0; i < 8; i++) {
3277       res |=  ((arg->w16[i] == 0) ? 1 : 0) << i;
3278    }
3279    return res;
3280 }
3281 
3282 /* Helps with PCMP{I,E}STR{I,M}.
3283 
3284    CALLED FROM GENERATED CODE: DIRTY HELPER(s).  (But not really,
3285    actually it could be a clean helper, but for the fact that we can't
3286    pass by value 2 x V128 to a clean helper, nor have one returned.)
3287    Reads guest state, writes to guest state for the xSTRM cases, no
3288    accesses of memory, is a pure function.
3289 
3290    opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
3291    the callee knows which I/E and I/M variant it is dealing with and
3292    what the specific operation is.  4th byte of opcode is in the range
3293    0x60 to 0x63:
3294        istri  66 0F 3A 63
3295        istrm  66 0F 3A 62
3296        estri  66 0F 3A 61
3297        estrm  66 0F 3A 60
3298 
3299    gstOffL and gstOffR are the guest state offsets for the two XMM
3300    register inputs.  We never have to deal with the memory case since
3301    that is handled by pre-loading the relevant value into the fake
3302    XMM16 register.
3303 
3304    For ESTRx variants, edxIN and eaxIN hold the values of those two
3305    registers.
3306 
3307    In all cases, the bottom 16 bits of the result contain the new
3308    OSZACP %rflags values.  For xSTRI variants, bits[31:16] of the
3309    result hold the new %ecx value.  For xSTRM variants, the helper
3310    writes the result directly to the guest XMM0.
3311 
3312    Declarable side effects: in all cases, reads guest state at
3313    [gstOffL, +16) and [gstOffR, +16).  For xSTRM variants, also writes
3314    guest_XMM0.
3315 
3316    Is expected to be called with opc_and_imm combinations which have
3317    actually been validated, and will assert if otherwise.  The front
3318    end should ensure we're only called with verified values.
3319 */
amd64g_dirtyhelper_PCMPxSTRx(VexGuestAMD64State * gst,HWord opc4_and_imm,HWord gstOffL,HWord gstOffR,HWord edxIN,HWord eaxIN)3320 ULong amd64g_dirtyhelper_PCMPxSTRx (
3321           VexGuestAMD64State* gst,
3322           HWord opc4_and_imm,
3323           HWord gstOffL, HWord gstOffR,
3324           HWord edxIN, HWord eaxIN
3325        )
3326 {
3327    HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
3328    HWord imm8 = opc4_and_imm & 0xFF;
3329    HWord isISTRx = opc4 & 2;
3330    HWord isxSTRM = (opc4 & 1) ^ 1;
3331    vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
3332    HWord wide = (imm8 & 1);
3333 
3334    // where the args are
3335    V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
3336    V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
3337 
3338    /* Create the arg validity masks, either from the vectors
3339       themselves or from the supplied edx/eax values. */
3340    // FIXME: this is only right for the 8-bit data cases.
3341    // At least that is asserted above.
3342    UInt zmaskL, zmaskR;
3343 
3344    // temp spot for the resulting flags and vector.
3345    V128 resV;
3346    UInt resOSZACP;
3347 
3348    // for checking whether case was handled
3349    Bool ok = False;
3350 
3351    if (wide) {
3352       if (isISTRx) {
3353          zmaskL = zmask_from_V128_wide(argL);
3354          zmaskR = zmask_from_V128_wide(argR);
3355       } else {
3356          Int tmp;
3357          tmp = edxIN & 0xFFFFFFFF;
3358          if (tmp < -8) tmp = -8;
3359          if (tmp > 8)  tmp = 8;
3360          if (tmp < 0)  tmp = -tmp;
3361          vassert(tmp >= 0 && tmp <= 8);
3362          zmaskL = (1 << tmp) & 0xFF;
3363          tmp = eaxIN & 0xFFFFFFFF;
3364          if (tmp < -8) tmp = -8;
3365          if (tmp > 8)  tmp = 8;
3366          if (tmp < 0)  tmp = -tmp;
3367          vassert(tmp >= 0 && tmp <= 8);
3368          zmaskR = (1 << tmp) & 0xFF;
3369       }
3370       // do the meyaath
3371       ok = compute_PCMPxSTRx_wide (
3372               &resV, &resOSZACP, argL, argR,
3373               zmaskL, zmaskR, imm8, (Bool)isxSTRM
3374            );
3375    } else {
3376       if (isISTRx) {
3377          zmaskL = zmask_from_V128(argL);
3378          zmaskR = zmask_from_V128(argR);
3379       } else {
3380          Int tmp;
3381          tmp = edxIN & 0xFFFFFFFF;
3382          if (tmp < -16) tmp = -16;
3383          if (tmp > 16)  tmp = 16;
3384          if (tmp < 0)   tmp = -tmp;
3385          vassert(tmp >= 0 && tmp <= 16);
3386          zmaskL = (1 << tmp) & 0xFFFF;
3387          tmp = eaxIN & 0xFFFFFFFF;
3388          if (tmp < -16) tmp = -16;
3389          if (tmp > 16)  tmp = 16;
3390          if (tmp < 0)   tmp = -tmp;
3391          vassert(tmp >= 0 && tmp <= 16);
3392          zmaskR = (1 << tmp) & 0xFFFF;
3393       }
3394       // do the meyaath
3395       ok = compute_PCMPxSTRx (
3396               &resV, &resOSZACP, argL, argR,
3397               zmaskL, zmaskR, imm8, (Bool)isxSTRM
3398            );
3399    }
3400 
3401    // front end shouldn't pass us any imm8 variants we can't
3402    // handle.  Hence:
3403    vassert(ok);
3404 
3405    // So, finally we need to get the results back to the caller.
3406    // In all cases, the new OSZACP value is the lowest 16 of
3407    // the return value.
3408    if (isxSTRM) {
3409       gst->guest_YMM0[0] = resV.w32[0];
3410       gst->guest_YMM0[1] = resV.w32[1];
3411       gst->guest_YMM0[2] = resV.w32[2];
3412       gst->guest_YMM0[3] = resV.w32[3];
3413       return resOSZACP & 0x8D5;
3414    } else {
3415       UInt newECX = resV.w32[0] & 0xFFFF;
3416       return (newECX << 16) | (resOSZACP & 0x8D5);
3417    }
3418 }
3419 
3420 /*---------------------------------------------------------------*/
3421 /*--- AES primitives and helpers                              ---*/
3422 /*---------------------------------------------------------------*/
3423 /* a 16 x 16 matrix */
3424 static const UChar sbox[256] = {                   // row nr
3425    0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1
3426    0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
3427    0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2
3428    0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
3429    0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3
3430    0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
3431    0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4
3432    0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
3433    0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5
3434    0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
3435    0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6
3436    0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
3437    0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7
3438    0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
3439    0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8
3440    0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
3441    0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9
3442    0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
3443    0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10
3444    0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
3445    0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11
3446    0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
3447    0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12
3448    0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
3449    0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13
3450    0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
3451    0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14
3452    0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
3453    0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15
3454    0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
3455    0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16
3456    0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
3457 };
SubBytes(V128 * v)3458 static void SubBytes (V128* v)
3459 {
3460    V128 r;
3461    UInt i;
3462    for (i = 0; i < 16; i++)
3463       r.w8[i] = sbox[v->w8[i]];
3464    *v = r;
3465 }
3466 
3467 /* a 16 x 16 matrix */
3468 static const UChar invsbox[256] = {                // row nr
3469    0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1
3470    0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
3471    0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2
3472    0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
3473    0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3
3474    0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
3475    0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4
3476    0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
3477    0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5
3478    0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
3479    0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6
3480    0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
3481    0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7
3482    0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
3483    0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8
3484    0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
3485    0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9
3486    0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
3487    0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10
3488    0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
3489    0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11
3490    0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
3491    0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12
3492    0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
3493    0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13
3494    0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
3495    0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14
3496    0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
3497    0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15
3498    0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
3499    0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16
3500    0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
3501 };
InvSubBytes(V128 * v)3502 static void InvSubBytes (V128* v)
3503 {
3504    V128 r;
3505    UInt i;
3506    for (i = 0; i < 16; i++)
3507       r.w8[i] = invsbox[v->w8[i]];
3508    *v = r;
3509 }
3510 
3511 static const UChar ShiftRows_op[16] =
3512    {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0};
ShiftRows(V128 * v)3513 static void ShiftRows (V128* v)
3514 {
3515    V128 r;
3516    UInt i;
3517    for (i = 0; i < 16; i++)
3518       r.w8[i] = v->w8[ShiftRows_op[15-i]];
3519    *v = r;
3520 }
3521 
3522 static const UChar InvShiftRows_op[16] =
3523    {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0};
InvShiftRows(V128 * v)3524 static void InvShiftRows (V128* v)
3525 {
3526    V128 r;
3527    UInt i;
3528    for (i = 0; i < 16; i++)
3529       r.w8[i] = v->w8[InvShiftRows_op[15-i]];
3530    *v = r;
3531 }
3532 
3533 /* Multiplication of the finite fields elements of AES.
3534    See "A Specification for The AES Algorithm Rijndael
3535         (by Joan Daemen & Vincent Rijmen)"
3536         Dr. Brian Gladman, v3.1, 3rd March 2001. */
3537 /* N values so that (hex) xy = 0x03^N.
3538    0x00 cannot be used. We put 0xff for this value.*/
3539 /* a 16 x 16 matrix */
3540 static const UChar Nxy[256] = {                    // row nr
3541    0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1
3542    0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
3543    0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2
3544    0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
3545    0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3
3546    0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
3547    0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4
3548    0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
3549    0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5
3550    0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
3551    0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6
3552    0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
3553    0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7
3554    0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
3555    0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8
3556    0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
3557    0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9
3558    0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
3559    0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10
3560    0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
3561    0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11
3562    0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
3563    0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12
3564    0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
3565    0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13
3566    0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
3567    0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14
3568    0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
3569    0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15
3570    0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
3571    0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16
3572    0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07
3573 };
3574 
3575 /* E values so that E = 0x03^xy. */
3576 static const UChar Exy[256] = {                    // row nr
3577    0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1
3578    0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35,
3579    0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2
3580    0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa,
3581    0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3
3582    0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31,
3583    0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4
3584    0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd,
3585    0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5
3586    0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88,
3587    0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6
3588    0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a,
3589    0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7
3590    0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3,
3591    0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8
3592    0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0,
3593    0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9
3594    0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41,
3595    0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10
3596    0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75,
3597    0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11
3598    0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80,
3599    0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12
3600    0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54,
3601    0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13
3602    0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca,
3603    0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14
3604    0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e,
3605    0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15
3606    0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17,
3607    0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16
3608    0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01};
3609 
ff_mul(UChar u1,UChar u2)3610 static inline UChar ff_mul(UChar u1, UChar u2)
3611 {
3612    if ((u1 > 0) && (u2 > 0)) {
3613       UInt ui = Nxy[u1] + Nxy[u2];
3614       if (ui >= 255)
3615          ui = ui - 255;
3616       return Exy[ui];
3617    } else {
3618       return 0;
3619    };
3620 }
3621 
MixColumns(V128 * v)3622 static void MixColumns (V128* v)
3623 {
3624    V128 r;
3625    Int j;
3626 #define P(x,row,col) (x)->w8[((row)*4+(col))]
3627    for (j = 0; j < 4; j++) {
3628       P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1))
3629          ^ P(v,j,2) ^ P(v,j,3);
3630       P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) )
3631          ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3);
3632       P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) )
3633          ^ ff_mul(0x03, P(v,j,3) );
3634       P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2)
3635          ^ ff_mul( 0x02, P(v,j,3) );
3636    }
3637    *v = r;
3638 #undef P
3639 }
3640 
InvMixColumns(V128 * v)3641 static void InvMixColumns (V128* v)
3642 {
3643    V128 r;
3644    Int j;
3645 #define P(x,row,col) (x)->w8[((row)*4+(col))]
3646    for (j = 0; j < 4; j++) {
3647       P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) )
3648          ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) );
3649       P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) )
3650          ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) );
3651       P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) )
3652          ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) );
3653       P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) )
3654          ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) );
3655    }
3656    *v = r;
3657 #undef P
3658 
3659 }
3660 
3661 /* For description, see definition in guest_amd64_defs.h */
amd64g_dirtyhelper_AES(VexGuestAMD64State * gst,HWord opc4,HWord gstOffD,HWord gstOffL,HWord gstOffR)3662 void amd64g_dirtyhelper_AES (
3663           VexGuestAMD64State* gst,
3664           HWord opc4, HWord gstOffD,
3665           HWord gstOffL, HWord gstOffR
3666        )
3667 {
3668    // where the args are
3669    V128* argD = (V128*)( ((UChar*)gst) + gstOffD );
3670    V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
3671    V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
3672    V128  r;
3673 
3674    switch (opc4) {
3675       case 0xDC: /* AESENC */
3676       case 0xDD: /* AESENCLAST */
3677          r = *argR;
3678          ShiftRows (&r);
3679          SubBytes  (&r);
3680          if (opc4 == 0xDC)
3681             MixColumns (&r);
3682          argD->w64[0] = r.w64[0] ^ argL->w64[0];
3683          argD->w64[1] = r.w64[1] ^ argL->w64[1];
3684          break;
3685 
3686       case 0xDE: /* AESDEC */
3687       case 0xDF: /* AESDECLAST */
3688          r = *argR;
3689          InvShiftRows (&r);
3690          InvSubBytes (&r);
3691          if (opc4 == 0xDE)
3692             InvMixColumns (&r);
3693          argD->w64[0] = r.w64[0] ^ argL->w64[0];
3694          argD->w64[1] = r.w64[1] ^ argL->w64[1];
3695          break;
3696 
3697       case 0xDB: /* AESIMC */
3698          *argD = *argL;
3699          InvMixColumns (argD);
3700          break;
3701       default: vassert(0);
3702    }
3703 }
3704 
RotWord(UInt w32)3705 static inline UInt RotWord (UInt   w32)
3706 {
3707    return ((w32 >> 8) | (w32 << 24));
3708 }
3709 
SubWord(UInt w32)3710 static inline UInt SubWord (UInt   w32)
3711 {
3712    UChar *w8;
3713    UChar *r8;
3714    UInt res;
3715    w8 = (UChar*) &w32;
3716    r8 = (UChar*) &res;
3717    r8[0] = sbox[w8[0]];
3718    r8[1] = sbox[w8[1]];
3719    r8[2] = sbox[w8[2]];
3720    r8[3] = sbox[w8[3]];
3721    return res;
3722 }
3723 
3724 /* For description, see definition in guest_amd64_defs.h */
amd64g_dirtyhelper_AESKEYGENASSIST(VexGuestAMD64State * gst,HWord imm8,HWord gstOffL,HWord gstOffR)3725 extern void amd64g_dirtyhelper_AESKEYGENASSIST (
3726           VexGuestAMD64State* gst,
3727           HWord imm8,
3728           HWord gstOffL, HWord gstOffR
3729        )
3730 {
3731    // where the args are
3732    V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
3733    V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
3734 
3735    argR->w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8;
3736    argR->w32[2] = SubWord (argL->w32[3]);
3737    argR->w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8;
3738    argR->w32[0] = SubWord (argL->w32[1]);
3739 }
3740 
3741 
3742 
3743 /*---------------------------------------------------------------*/
3744 /*--- Helpers for dealing with, and describing,               ---*/
3745 /*--- guest state as a whole.                                 ---*/
3746 /*---------------------------------------------------------------*/
3747 
3748 /* Initialise the entire amd64 guest state. */
3749 /* VISIBLE TO LIBVEX CLIENT */
LibVEX_GuestAMD64_initialise(VexGuestAMD64State * vex_state)3750 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
3751 {
3752    vex_state->host_EvC_FAILADDR = 0;
3753    vex_state->host_EvC_COUNTER = 0;
3754    vex_state->pad0 = 0;
3755 
3756    vex_state->guest_RAX = 0;
3757    vex_state->guest_RCX = 0;
3758    vex_state->guest_RDX = 0;
3759    vex_state->guest_RBX = 0;
3760    vex_state->guest_RSP = 0;
3761    vex_state->guest_RBP = 0;
3762    vex_state->guest_RSI = 0;
3763    vex_state->guest_RDI = 0;
3764    vex_state->guest_R8  = 0;
3765    vex_state->guest_R9  = 0;
3766    vex_state->guest_R10 = 0;
3767    vex_state->guest_R11 = 0;
3768    vex_state->guest_R12 = 0;
3769    vex_state->guest_R13 = 0;
3770    vex_state->guest_R14 = 0;
3771    vex_state->guest_R15 = 0;
3772 
3773    vex_state->guest_CC_OP   = AMD64G_CC_OP_COPY;
3774    vex_state->guest_CC_DEP1 = 0;
3775    vex_state->guest_CC_DEP2 = 0;
3776    vex_state->guest_CC_NDEP = 0;
3777 
3778    vex_state->guest_DFLAG   = 1; /* forwards */
3779    vex_state->guest_IDFLAG  = 0;
3780    vex_state->guest_ACFLAG  = 0;
3781 
3782    /* HACK: represent the offset associated with %fs==0. This
3783       assumes that %fs is only ever zero. */
3784    vex_state->guest_FS_ZERO = 0;
3785 
3786    vex_state->guest_RIP = 0;
3787 
3788    /* Initialise the simulated FPU */
3789    amd64g_dirtyhelper_FINIT( vex_state );
3790 
3791    /* Initialise the AVX state. */
3792 #  define AVXZERO(_ymm) \
3793       do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \
3794            _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \
3795       } while (0)
3796    vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
3797    AVXZERO(vex_state->guest_YMM0);
3798    AVXZERO(vex_state->guest_YMM1);
3799    AVXZERO(vex_state->guest_YMM2);
3800    AVXZERO(vex_state->guest_YMM3);
3801    AVXZERO(vex_state->guest_YMM4);
3802    AVXZERO(vex_state->guest_YMM5);
3803    AVXZERO(vex_state->guest_YMM6);
3804    AVXZERO(vex_state->guest_YMM7);
3805    AVXZERO(vex_state->guest_YMM8);
3806    AVXZERO(vex_state->guest_YMM9);
3807    AVXZERO(vex_state->guest_YMM10);
3808    AVXZERO(vex_state->guest_YMM11);
3809    AVXZERO(vex_state->guest_YMM12);
3810    AVXZERO(vex_state->guest_YMM13);
3811    AVXZERO(vex_state->guest_YMM14);
3812    AVXZERO(vex_state->guest_YMM15);
3813    AVXZERO(vex_state->guest_YMM16);
3814 
3815 #  undef AVXZERO
3816 
3817    vex_state->guest_EMNOTE = EmNote_NONE;
3818 
3819    /* These should not ever be either read or written, but we
3820       initialise them anyway. */
3821    vex_state->guest_CMSTART = 0;
3822    vex_state->guest_CMLEN   = 0;
3823 
3824    vex_state->guest_NRADDR   = 0;
3825    vex_state->guest_SC_CLASS = 0;
3826    vex_state->guest_GS_0x60  = 0;
3827 
3828    vex_state->guest_IP_AT_SYSCALL = 0;
3829    vex_state->pad1 = 0;
3830 }
3831 
3832 
3833 /* Figure out if any part of the guest state contained in minoff
3834    .. maxoff requires precise memory exceptions.  If in doubt return
3835    True (but this generates significantly slower code).
3836 
3837    By default we enforce precise exns for guest %RSP, %RBP and %RIP
3838    only.  These are the minimum needed to extract correct stack
3839    backtraces from amd64 code.
3840 
3841    Only %RSP is needed in mode VexRegUpdSpAtMemAccess.
3842 */
guest_amd64_state_requires_precise_mem_exns(Int minoff,Int maxoff)3843 Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff,
3844                                                    Int maxoff)
3845 {
3846    Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
3847    Int rbp_max = rbp_min + 8 - 1;
3848    Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
3849    Int rsp_max = rsp_min + 8 - 1;
3850    Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
3851    Int rip_max = rip_min + 8 - 1;
3852 
3853    if (maxoff < rsp_min || minoff > rsp_max) {
3854       /* no overlap with rsp */
3855       if (vex_control.iropt_register_updates == VexRegUpdSpAtMemAccess)
3856          return False; // We only need to check stack pointer.
3857    } else {
3858       return True;
3859    }
3860 
3861    if (maxoff < rbp_min || minoff > rbp_max) {
3862       /* no overlap with rbp */
3863    } else {
3864       return True;
3865    }
3866 
3867    if (maxoff < rip_min || minoff > rip_max) {
3868       /* no overlap with eip */
3869    } else {
3870       return True;
3871    }
3872 
3873    return False;
3874 }
3875 
3876 
3877 #define ALWAYSDEFD(field)                             \
3878     { offsetof(VexGuestAMD64State, field),            \
3879       (sizeof ((VexGuestAMD64State*)0)->field) }
3880 
3881 VexGuestLayout
3882    amd64guest_layout
3883       = {
3884           /* Total size of the guest state, in bytes. */
3885           .total_sizeB = sizeof(VexGuestAMD64State),
3886 
3887           /* Describe the stack pointer. */
3888           .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
3889           .sizeof_SP = 8,
3890 
3891           /* Describe the frame pointer. */
3892           .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
3893           .sizeof_FP = 8,
3894 
3895           /* Describe the instruction pointer. */
3896           .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
3897           .sizeof_IP = 8,
3898 
3899           /* Describe any sections to be regarded by Memcheck as
3900              'always-defined'. */
3901           .n_alwaysDefd = 16,
3902 
3903           /* flags thunk: OP and NDEP are always defd, whereas DEP1
3904              and DEP2 have to be tracked.  See detailed comment in
3905              gdefs.h on meaning of thunk fields. */
3906           .alwaysDefd
3907              = { /*  0 */ ALWAYSDEFD(guest_CC_OP),
3908                  /*  1 */ ALWAYSDEFD(guest_CC_NDEP),
3909 		 /*  2 */ ALWAYSDEFD(guest_DFLAG),
3910                  /*  3 */ ALWAYSDEFD(guest_IDFLAG),
3911                  /*  4 */ ALWAYSDEFD(guest_RIP),
3912                  /*  5 */ ALWAYSDEFD(guest_FS_ZERO),
3913                  /*  6 */ ALWAYSDEFD(guest_FTOP),
3914                  /*  7 */ ALWAYSDEFD(guest_FPTAG),
3915                  /*  8 */ ALWAYSDEFD(guest_FPROUND),
3916                  /*  9 */ ALWAYSDEFD(guest_FC3210),
3917                  // /* */ ALWAYSDEFD(guest_CS),
3918                  // /* */ ALWAYSDEFD(guest_DS),
3919                  // /* */ ALWAYSDEFD(guest_ES),
3920                  // /* */ ALWAYSDEFD(guest_FS),
3921                  // /* */ ALWAYSDEFD(guest_GS),
3922                  // /* */ ALWAYSDEFD(guest_SS),
3923                  // /* */ ALWAYSDEFD(guest_LDT),
3924                  // /* */ ALWAYSDEFD(guest_GDT),
3925                  /* 10 */ ALWAYSDEFD(guest_EMNOTE),
3926                  /* 11 */ ALWAYSDEFD(guest_SSEROUND),
3927                  /* 12 */ ALWAYSDEFD(guest_CMSTART),
3928                  /* 13 */ ALWAYSDEFD(guest_CMLEN),
3929                  /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
3930                  /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
3931                }
3932         };
3933 
3934 
3935 /*---------------------------------------------------------------*/
3936 /*--- end                               guest_amd64_helpers.c ---*/
3937 /*---------------------------------------------------------------*/
3938