• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations.         ---*/
4 /*---                                               mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of MemCheck, a heavyweight Valgrind tool for
9    detecting memory errors.
10 
11    Copyright (C) 2000-2011 Julian Seward
12       jseward@acm.org
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #include "pub_tool_basics.h"
33 #include "pub_tool_hashtable.h"     // For mc_include.h
34 #include "pub_tool_libcassert.h"
35 #include "pub_tool_libcprint.h"
36 #include "pub_tool_tooliface.h"
37 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
38 #include "pub_tool_xarray.h"
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_libcbase.h"
41 
42 #include "mc_include.h"
43 
44 
45 /* FIXMEs JRS 2011-June-16.
46 
47    Check the interpretation for vector narrowing and widening ops,
48    particularly the saturating ones.  I suspect they are either overly
49    pessimistic and/or wrong.
50 */
51 
52 /* This file implements the Memcheck instrumentation, and in
53    particular contains the core of its undefined value detection
54    machinery.  For a comprehensive background of the terminology,
55    algorithms and rationale used herein, read:
56 
57      Using Valgrind to detect undefined value errors with
58      bit-precision
59 
60      Julian Seward and Nicholas Nethercote
61 
62      2005 USENIX Annual Technical Conference (General Track),
63      Anaheim, CA, USA, April 10-15, 2005.
64 
65    ----
66 
67    Here is as good a place as any to record exactly when V bits are and
68    should be checked, why, and what function is responsible.
69 
70 
71    Memcheck complains when an undefined value is used:
72 
73    1. In the condition of a conditional branch.  Because it could cause
74       incorrect control flow, and thus cause incorrect externally-visible
75       behaviour.  [mc_translate.c:complainIfUndefined]
76 
77    2. As an argument to a system call, or as the value that specifies
78       the system call number.  Because it could cause an incorrect
79       externally-visible side effect.  [mc_translate.c:mc_pre_reg_read]
80 
81    3. As the address in a load or store.  Because it could cause an
82       incorrect value to be used later, which could cause externally-visible
83       behaviour (eg. via incorrect control flow or an incorrect system call
84       argument)  [complainIfUndefined]
85 
86    4. As the target address of a branch.  Because it could cause incorrect
87       control flow.  [complainIfUndefined]
88 
89    5. As an argument to setenv, unsetenv, or putenv.  Because it could put
90       an incorrect value into the external environment.
91       [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
92 
93    6. As the index in a GETI or PUTI operation.  I'm not sure why... (njn).
94       [complainIfUndefined]
95 
96    7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
97       VALGRIND_CHECK_VALUE_IS_DEFINED client requests.  Because the user
98       requested it.  [in memcheck.h]
99 
100 
101    Memcheck also complains, but should not, when an undefined value is used:
102 
103    8. As the shift value in certain SIMD shift operations (but not in the
104       standard integer shift operations).  This inconsistency is due to
105       historical reasons.)  [complainIfUndefined]
106 
107 
108    Memcheck does not complain, but should, when an undefined value is used:
109 
110    9. As an input to a client request.  Because the client request may
111       affect the visible behaviour -- see bug #144362 for an example
112       involving the malloc replacements in vg_replace_malloc.c and
113       VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
114       isn't identified.  That bug report also has some info on how to solve
115       the problem.  [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
116 
117 
118    In practice, 1 and 2 account for the vast majority of cases.
119 */
120 
121 /*------------------------------------------------------------*/
122 /*--- Forward decls                                        ---*/
123 /*------------------------------------------------------------*/
124 
125 struct _MCEnv;
126 
127 static IRType  shadowTypeV ( IRType ty );
128 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
129 static IRTemp  findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
130 
131 static IRExpr *i128_const_zero(void);
132 
133 /*------------------------------------------------------------*/
134 /*--- Memcheck running state, and tmp management.          ---*/
135 /*------------------------------------------------------------*/
136 
137 /* Carries info about a particular tmp.  The tmp's number is not
138    recorded, as this is implied by (equal to) its index in the tmpMap
139    in MCEnv.  The tmp's type is also not recorded, as this is present
140    in MCEnv.sb->tyenv.
141 
142    When .kind is Orig, .shadowV and .shadowB may give the identities
143    of the temps currently holding the associated definedness (shadowV)
144    and origin (shadowB) values, or these may be IRTemp_INVALID if code
145    to compute such values has not yet been emitted.
146 
147    When .kind is VSh or BSh then the tmp is holds a V- or B- value,
148    and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
149    illogical for a shadow tmp itself to be shadowed.
150 */
151 typedef
152    enum { Orig=1, VSh=2, BSh=3 }
153    TempKind;
154 
155 typedef
156    struct {
157       TempKind kind;
158       IRTemp   shadowV;
159       IRTemp   shadowB;
160    }
161    TempMapEnt;
162 
163 
164 /* Carries around state during memcheck instrumentation. */
165 typedef
166    struct _MCEnv {
167       /* MODIFIED: the superblock being constructed.  IRStmts are
168          added. */
169       IRSB* sb;
170       Bool  trace;
171 
172       /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
173          current kind and possibly shadow temps for each temp in the
174          IRSB being constructed.  Note that it does not contain the
175          type of each tmp.  If you want to know the type, look at the
176          relevant entry in sb->tyenv.  It follows that at all times
177          during the instrumentation process, the valid indices for
178          tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
179          total number of Orig, V- and B- temps allocated so far.
180 
181          The reason for this strange split (types in one place, all
182          other info in another) is that we need the types to be
183          attached to sb so as to make it possible to do
184          "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
185          instrumentation process. */
186       XArray* /* of TempMapEnt */ tmpMap;
187 
188       /* MODIFIED: indicates whether "bogus" literals have so far been
189          found.  Starts off False, and may change to True. */
190       Bool    bogusLiterals;
191 
192       /* READONLY: the guest layout.  This indicates which parts of
193          the guest state should be regarded as 'always defined'. */
194       VexGuestLayout* layout;
195 
196       /* READONLY: the host word type.  Needed for constructing
197          arguments of type 'HWord' to be passed to helper functions.
198          Ity_I32 or Ity_I64 only. */
199       IRType hWordTy;
200    }
201    MCEnv;
202 
203 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
204    demand), as they are encountered.  This is for two reasons.
205 
206    (1) (less important reason): Many original tmps are unused due to
207    initial IR optimisation, and we do not want to spaces in tables
208    tracking them.
209 
210    Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
211    table indexed [0 .. n_types-1], which gives the current shadow for
212    each original tmp, or INVALID_IRTEMP if none is so far assigned.
213    It is necessary to support making multiple assignments to a shadow
214    -- specifically, after testing a shadow for definedness, it needs
215    to be made defined.  But IR's SSA property disallows this.
216 
217    (2) (more important reason): Therefore, when a shadow needs to get
218    a new value, a new temporary is created, the value is assigned to
219    that, and the tmpMap is updated to reflect the new binding.
220 
221    A corollary is that if the tmpMap maps a given tmp to
222    IRTemp_INVALID and we are hoping to read that shadow tmp, it means
223    there's a read-before-write error in the original tmps.  The IR
224    sanity checker should catch all such anomalies, however.
225 */
226 
227 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
228    both the table in mce->sb and to our auxiliary mapping.  Note that
229    newTemp may cause mce->tmpMap to resize, hence previous results
230    from VG_(indexXA)(mce->tmpMap) are invalidated. */
newTemp(MCEnv * mce,IRType ty,TempKind kind)231 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
232 {
233    Word       newIx;
234    TempMapEnt ent;
235    IRTemp     tmp = newIRTemp(mce->sb->tyenv, ty);
236    ent.kind    = kind;
237    ent.shadowV = IRTemp_INVALID;
238    ent.shadowB = IRTemp_INVALID;
239    newIx = VG_(addToXA)( mce->tmpMap, &ent );
240    tl_assert(newIx == (Word)tmp);
241    return tmp;
242 }
243 
244 
245 /* Find the tmp currently shadowing the given original tmp.  If none
246    so far exists, allocate one.  */
findShadowTmpV(MCEnv * mce,IRTemp orig)247 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
248 {
249    TempMapEnt* ent;
250    /* VG_(indexXA) range-checks 'orig', hence no need to check
251       here. */
252    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
253    tl_assert(ent->kind == Orig);
254    if (ent->shadowV == IRTemp_INVALID) {
255       IRTemp tmpV
256         = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
257       /* newTemp may cause mce->tmpMap to resize, hence previous results
258          from VG_(indexXA) are invalid. */
259       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
260       tl_assert(ent->kind == Orig);
261       tl_assert(ent->shadowV == IRTemp_INVALID);
262       ent->shadowV = tmpV;
263    }
264    return ent->shadowV;
265 }
266 
267 /* Allocate a new shadow for the given original tmp.  This means any
268    previous shadow is abandoned.  This is needed because it is
269    necessary to give a new value to a shadow once it has been tested
270    for undefinedness, but unfortunately IR's SSA property disallows
271    this.  Instead we must abandon the old shadow, allocate a new one
272    and use that instead.
273 
274    This is the same as findShadowTmpV, except we don't bother to see
275    if a shadow temp already existed -- we simply allocate a new one
276    regardless. */
newShadowTmpV(MCEnv * mce,IRTemp orig)277 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
278 {
279    TempMapEnt* ent;
280    /* VG_(indexXA) range-checks 'orig', hence no need to check
281       here. */
282    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
283    tl_assert(ent->kind == Orig);
284    if (1) {
285       IRTemp tmpV
286         = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
287       /* newTemp may cause mce->tmpMap to resize, hence previous results
288          from VG_(indexXA) are invalid. */
289       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
290       tl_assert(ent->kind == Orig);
291       ent->shadowV = tmpV;
292    }
293 }
294 
295 
296 /*------------------------------------------------------------*/
297 /*--- IRAtoms -- a subset of IRExprs                       ---*/
298 /*------------------------------------------------------------*/
299 
300 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
301    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
302    input, most of this code deals in atoms.  Usefully, a value atom
303    always has a V-value which is also an atom: constants are shadowed
304    by constants, and temps are shadowed by the corresponding shadow
305    temporary. */
306 
307 typedef  IRExpr  IRAtom;
308 
309 /* (used for sanity checks only): is this an atom which looks
310    like it's from original code? */
isOriginalAtom(MCEnv * mce,IRAtom * a1)311 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
312 {
313    if (a1->tag == Iex_Const)
314       return True;
315    if (a1->tag == Iex_RdTmp) {
316       TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
317       return ent->kind == Orig;
318    }
319    return False;
320 }
321 
322 /* (used for sanity checks only): is this an atom which looks
323    like it's from shadow code? */
isShadowAtom(MCEnv * mce,IRAtom * a1)324 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
325 {
326    if (a1->tag == Iex_Const)
327       return True;
328    if (a1->tag == Iex_RdTmp) {
329       TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
330       return ent->kind == VSh || ent->kind == BSh;
331    }
332    return False;
333 }
334 
335 /* (used for sanity checks only): check that both args are atoms and
336    are identically-kinded. */
sameKindedAtoms(IRAtom * a1,IRAtom * a2)337 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
338 {
339    if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
340       return True;
341    if (a1->tag == Iex_Const && a2->tag == Iex_Const)
342       return True;
343    return False;
344 }
345 
346 
347 /*------------------------------------------------------------*/
348 /*--- Type management                                      ---*/
349 /*------------------------------------------------------------*/
350 
351 /* Shadow state is always accessed using integer types.  This returns
352    an integer type with the same size (as per sizeofIRType) as the
353    given type.  The only valid shadow types are Bit, I8, I16, I32,
354    I64, I128, V128. */
355 
shadowTypeV(IRType ty)356 static IRType shadowTypeV ( IRType ty )
357 {
358    switch (ty) {
359       case Ity_I1:
360       case Ity_I8:
361       case Ity_I16:
362       case Ity_I32:
363       case Ity_I64:
364       case Ity_I128: return ty;
365       case Ity_F32:  return Ity_I32;
366       case Ity_F64:  return Ity_I64;
367       case Ity_F128: return Ity_I128;
368       case Ity_V128: return Ity_V128;
369       default: ppIRType(ty);
370                VG_(tool_panic)("memcheck:shadowTypeV");
371    }
372 }
373 
374 /* Produce a 'defined' value of the given shadow type.  Should only be
375    supplied shadow types (Bit/I8/I16/I32/UI64). */
definedOfType(IRType ty)376 static IRExpr* definedOfType ( IRType ty ) {
377    switch (ty) {
378       case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
379       case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
380       case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
381       case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
382       case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
383       case Ity_I128: return i128_const_zero();
384       case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
385       default:       VG_(tool_panic)("memcheck:definedOfType");
386    }
387 }
388 
389 
390 /*------------------------------------------------------------*/
391 /*--- Constructing IR fragments                            ---*/
392 /*------------------------------------------------------------*/
393 
394 /* add stmt to a bb */
stmt(HChar cat,MCEnv * mce,IRStmt * st)395 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
396    if (mce->trace) {
397       VG_(printf)("  %c: ", cat);
398       ppIRStmt(st);
399       VG_(printf)("\n");
400    }
401    addStmtToIRSB(mce->sb, st);
402 }
403 
404 /* assign value to tmp */
405 static inline
assign(HChar cat,MCEnv * mce,IRTemp tmp,IRExpr * expr)406 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
407    stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
408 }
409 
410 /* build various kinds of expressions */
411 #define triop(_op, _arg1, _arg2, _arg3) \
412                                  IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
413 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
414 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
415 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
416 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
417 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
418 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
419 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
420 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
421 
422 /* Bind the given expression to a new temporary, and return the
423    temporary.  This effectively converts an arbitrary expression into
424    an atom.
425 
426    'ty' is the type of 'e' and hence the type that the new temporary
427    needs to be.  But passing it in is redundant, since we can deduce
428    the type merely by inspecting 'e'.  So at least use that fact to
429    assert that the two types agree. */
assignNew(HChar cat,MCEnv * mce,IRType ty,IRExpr * e)430 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
431 {
432    TempKind k;
433    IRTemp   t;
434    IRType   tyE = typeOfIRExpr(mce->sb->tyenv, e);
435    tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
436    switch (cat) {
437       case 'V': k = VSh;  break;
438       case 'B': k = BSh;  break;
439       case 'C': k = Orig; break;
440                 /* happens when we are making up new "orig"
441                    expressions, for IRCAS handling */
442       default: tl_assert(0);
443    }
444    t = newTemp(mce, ty, k);
445    assign(cat, mce, t, e);
446    return mkexpr(t);
447 }
448 
449 
450 /*------------------------------------------------------------*/
451 /*--- Helper functions for 128-bit ops                     ---*/
452 /*------------------------------------------------------------*/
i128_const_zero(void)453 static IRExpr *i128_const_zero(void)
454 {
455   return binop(Iop_64HLto128, IRExpr_Const(IRConst_U64(0)),
456                IRExpr_Const(IRConst_U64(0)));
457 }
458 
459 /* There are no 128-bit loads and/or stores. So we do not need to worry
460    about that in expr2vbits_Load */
461 
462 /*------------------------------------------------------------*/
463 /*--- Constructing definedness primitive ops               ---*/
464 /*------------------------------------------------------------*/
465 
466 /* --------- Defined-if-either-defined --------- */
467 
mkDifD8(MCEnv * mce,IRAtom * a1,IRAtom * a2)468 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
469    tl_assert(isShadowAtom(mce,a1));
470    tl_assert(isShadowAtom(mce,a2));
471    return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
472 }
473 
mkDifD16(MCEnv * mce,IRAtom * a1,IRAtom * a2)474 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
475    tl_assert(isShadowAtom(mce,a1));
476    tl_assert(isShadowAtom(mce,a2));
477    return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
478 }
479 
mkDifD32(MCEnv * mce,IRAtom * a1,IRAtom * a2)480 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
481    tl_assert(isShadowAtom(mce,a1));
482    tl_assert(isShadowAtom(mce,a2));
483    return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
484 }
485 
mkDifD64(MCEnv * mce,IRAtom * a1,IRAtom * a2)486 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
487    tl_assert(isShadowAtom(mce,a1));
488    tl_assert(isShadowAtom(mce,a2));
489    return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
490 }
491 
mkDifDV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)492 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
493    tl_assert(isShadowAtom(mce,a1));
494    tl_assert(isShadowAtom(mce,a2));
495    return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
496 }
497 
498 /* --------- Undefined-if-either-undefined --------- */
499 
mkUifU8(MCEnv * mce,IRAtom * a1,IRAtom * a2)500 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
501    tl_assert(isShadowAtom(mce,a1));
502    tl_assert(isShadowAtom(mce,a2));
503    return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
504 }
505 
mkUifU16(MCEnv * mce,IRAtom * a1,IRAtom * a2)506 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
507    tl_assert(isShadowAtom(mce,a1));
508    tl_assert(isShadowAtom(mce,a2));
509    return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
510 }
511 
mkUifU32(MCEnv * mce,IRAtom * a1,IRAtom * a2)512 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
513    tl_assert(isShadowAtom(mce,a1));
514    tl_assert(isShadowAtom(mce,a2));
515    return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
516 }
517 
mkUifU64(MCEnv * mce,IRAtom * a1,IRAtom * a2)518 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
519    tl_assert(isShadowAtom(mce,a1));
520    tl_assert(isShadowAtom(mce,a2));
521    return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
522 }
523 
mkUifU128(MCEnv * mce,IRAtom * a1,IRAtom * a2)524 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
525    IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
526    tl_assert(isShadowAtom(mce,a1));
527    tl_assert(isShadowAtom(mce,a2));
528    tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
529    tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
530    tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
531    tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
532    tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
533    tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
534 
535    return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
536 }
537 
mkUifUV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)538 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
539    tl_assert(isShadowAtom(mce,a1));
540    tl_assert(isShadowAtom(mce,a2));
541    return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
542 }
543 
mkUifU(MCEnv * mce,IRType vty,IRAtom * a1,IRAtom * a2)544 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
545    switch (vty) {
546       case Ity_I8:   return mkUifU8(mce, a1, a2);
547       case Ity_I16:  return mkUifU16(mce, a1, a2);
548       case Ity_I32:  return mkUifU32(mce, a1, a2);
549       case Ity_I64:  return mkUifU64(mce, a1, a2);
550       case Ity_I128: return mkUifU128(mce, a1, a2);
551       case Ity_V128: return mkUifUV128(mce, a1, a2);
552       default:
553          VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
554          VG_(tool_panic)("memcheck:mkUifU");
555    }
556 }
557 
558 /* --------- The Left-family of operations. --------- */
559 
mkLeft8(MCEnv * mce,IRAtom * a1)560 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
561    tl_assert(isShadowAtom(mce,a1));
562    return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
563 }
564 
mkLeft16(MCEnv * mce,IRAtom * a1)565 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
566    tl_assert(isShadowAtom(mce,a1));
567    return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
568 }
569 
mkLeft32(MCEnv * mce,IRAtom * a1)570 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
571    tl_assert(isShadowAtom(mce,a1));
572    return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
573 }
574 
mkLeft64(MCEnv * mce,IRAtom * a1)575 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
576    tl_assert(isShadowAtom(mce,a1));
577    return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
578 }
579 
580 /* --------- 'Improvement' functions for AND/OR. --------- */
581 
582 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
583    defined (0); all other -> undefined (1).
584 */
mkImproveAND8(MCEnv * mce,IRAtom * data,IRAtom * vbits)585 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
586 {
587    tl_assert(isOriginalAtom(mce, data));
588    tl_assert(isShadowAtom(mce, vbits));
589    tl_assert(sameKindedAtoms(data, vbits));
590    return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
591 }
592 
mkImproveAND16(MCEnv * mce,IRAtom * data,IRAtom * vbits)593 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
594 {
595    tl_assert(isOriginalAtom(mce, data));
596    tl_assert(isShadowAtom(mce, vbits));
597    tl_assert(sameKindedAtoms(data, vbits));
598    return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
599 }
600 
mkImproveAND32(MCEnv * mce,IRAtom * data,IRAtom * vbits)601 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
602 {
603    tl_assert(isOriginalAtom(mce, data));
604    tl_assert(isShadowAtom(mce, vbits));
605    tl_assert(sameKindedAtoms(data, vbits));
606    return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
607 }
608 
mkImproveAND64(MCEnv * mce,IRAtom * data,IRAtom * vbits)609 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
610 {
611    tl_assert(isOriginalAtom(mce, data));
612    tl_assert(isShadowAtom(mce, vbits));
613    tl_assert(sameKindedAtoms(data, vbits));
614    return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
615 }
616 
mkImproveANDV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)617 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
618 {
619    tl_assert(isOriginalAtom(mce, data));
620    tl_assert(isShadowAtom(mce, vbits));
621    tl_assert(sameKindedAtoms(data, vbits));
622    return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
623 }
624 
625 /* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
626    defined (0); all other -> undefined (1).
627 */
mkImproveOR8(MCEnv * mce,IRAtom * data,IRAtom * vbits)628 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629 {
630    tl_assert(isOriginalAtom(mce, data));
631    tl_assert(isShadowAtom(mce, vbits));
632    tl_assert(sameKindedAtoms(data, vbits));
633    return assignNew(
634              'V', mce, Ity_I8,
635              binop(Iop_Or8,
636                    assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
637                    vbits) );
638 }
639 
mkImproveOR16(MCEnv * mce,IRAtom * data,IRAtom * vbits)640 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
641 {
642    tl_assert(isOriginalAtom(mce, data));
643    tl_assert(isShadowAtom(mce, vbits));
644    tl_assert(sameKindedAtoms(data, vbits));
645    return assignNew(
646              'V', mce, Ity_I16,
647              binop(Iop_Or16,
648                    assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
649                    vbits) );
650 }
651 
mkImproveOR32(MCEnv * mce,IRAtom * data,IRAtom * vbits)652 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653 {
654    tl_assert(isOriginalAtom(mce, data));
655    tl_assert(isShadowAtom(mce, vbits));
656    tl_assert(sameKindedAtoms(data, vbits));
657    return assignNew(
658              'V', mce, Ity_I32,
659              binop(Iop_Or32,
660                    assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
661                    vbits) );
662 }
663 
mkImproveOR64(MCEnv * mce,IRAtom * data,IRAtom * vbits)664 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
665 {
666    tl_assert(isOriginalAtom(mce, data));
667    tl_assert(isShadowAtom(mce, vbits));
668    tl_assert(sameKindedAtoms(data, vbits));
669    return assignNew(
670              'V', mce, Ity_I64,
671              binop(Iop_Or64,
672                    assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
673                    vbits) );
674 }
675 
mkImproveORV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)676 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
677 {
678    tl_assert(isOriginalAtom(mce, data));
679    tl_assert(isShadowAtom(mce, vbits));
680    tl_assert(sameKindedAtoms(data, vbits));
681    return assignNew(
682              'V', mce, Ity_V128,
683              binop(Iop_OrV128,
684                    assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
685                    vbits) );
686 }
687 
688 /* --------- Pessimising casts. --------- */
689 
690 /* The function returns an expression of type DST_TY. If any of the VBITS
691    is undefined (value == 1) the resulting expression has all bits set to
692    1. Otherwise, all bits are 0. */
693 
mkPCastTo(MCEnv * mce,IRType dst_ty,IRAtom * vbits)694 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
695 {
696    IRType  src_ty;
697    IRAtom* tmp1;
698    /* Note, dst_ty is a shadow type, not an original type. */
699    /* First of all, collapse vbits down to a single bit. */
700    tl_assert(isShadowAtom(mce,vbits));
701    src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
702 
703    /* Fast-track some common cases */
704    if (src_ty == Ity_I32 && dst_ty == Ity_I32)
705       return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
706 
707    if (src_ty == Ity_I64 && dst_ty == Ity_I64)
708       return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
709 
710    if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
711       IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
712       return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
713    }
714 
715    /* Else do it the slow way .. */
716    tmp1   = NULL;
717    switch (src_ty) {
718       case Ity_I1:
719          tmp1 = vbits;
720          break;
721       case Ity_I8:
722          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
723          break;
724       case Ity_I16:
725          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
726          break;
727       case Ity_I32:
728          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
729          break;
730       case Ity_I64:
731          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
732          break;
733       case Ity_I128: {
734          /* Gah.  Chop it in half, OR the halves together, and compare
735             that with zero. */
736          IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
737          IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
738          IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
739          tmp1         = assignNew('V', mce, Ity_I1,
740                                        unop(Iop_CmpNEZ64, tmp4));
741          break;
742       }
743       default:
744          ppIRType(src_ty);
745          VG_(tool_panic)("mkPCastTo(1)");
746    }
747    tl_assert(tmp1);
748    /* Now widen up to the dst type. */
749    switch (dst_ty) {
750       case Ity_I1:
751          return tmp1;
752       case Ity_I8:
753          return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
754       case Ity_I16:
755          return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
756       case Ity_I32:
757          return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
758       case Ity_I64:
759          return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
760       case Ity_V128:
761          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
762          tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
763          return tmp1;
764       case Ity_I128:
765          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
766          tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
767          return tmp1;
768       default:
769          ppIRType(dst_ty);
770          VG_(tool_panic)("mkPCastTo(2)");
771    }
772 }
773 
774 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
775 /*
776    Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
777    PCasting to Ity_U1.  However, sometimes it is necessary to be more
778    accurate.  The insight is that the result is defined if two
779    corresponding bits can be found, one from each argument, so that
780    both bits are defined but are different -- that makes EQ say "No"
781    and NE say "Yes".  Hence, we compute an improvement term and DifD
782    it onto the "normal" (UifU) result.
783 
784    The result is:
785 
786    PCastTo<1> (
787       -- naive version
788       PCastTo<sz>( UifU<sz>(vxx, vyy) )
789 
790       `DifD<sz>`
791 
792       -- improvement term
793       PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
794    )
795 
796    where
797      vec contains 0 (defined) bits where the corresponding arg bits
798      are defined but different, and 1 bits otherwise.
799 
800      vec = Or<sz>( vxx,   // 0 iff bit defined
801                    vyy,   // 0 iff bit defined
802                    Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
803                  )
804 
805      If any bit of vec is 0, the result is defined and so the
806      improvement term should produce 0...0, else it should produce
807      1...1.
808 
809      Hence require for the improvement term:
810 
811         if vec == 1...1 then 1...1 else 0...0
812      ->
813         PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
814 
815    This was extensively re-analysed and checked on 6 July 05.
816 */
expensiveCmpEQorNE(MCEnv * mce,IRType ty,IRAtom * vxx,IRAtom * vyy,IRAtom * xx,IRAtom * yy)817 static IRAtom* expensiveCmpEQorNE ( MCEnv*  mce,
818                                     IRType  ty,
819                                     IRAtom* vxx, IRAtom* vyy,
820                                     IRAtom* xx,  IRAtom* yy )
821 {
822    IRAtom *naive, *vec, *improvement_term;
823    IRAtom *improved, *final_cast, *top;
824    IROp   opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
825 
826    tl_assert(isShadowAtom(mce,vxx));
827    tl_assert(isShadowAtom(mce,vyy));
828    tl_assert(isOriginalAtom(mce,xx));
829    tl_assert(isOriginalAtom(mce,yy));
830    tl_assert(sameKindedAtoms(vxx,xx));
831    tl_assert(sameKindedAtoms(vyy,yy));
832 
833    switch (ty) {
834       case Ity_I32:
835          opOR   = Iop_Or32;
836          opDIFD = Iop_And32;
837          opUIFU = Iop_Or32;
838          opNOT  = Iop_Not32;
839          opXOR  = Iop_Xor32;
840          opCMP  = Iop_CmpEQ32;
841          top    = mkU32(0xFFFFFFFF);
842          break;
843       case Ity_I64:
844          opOR   = Iop_Or64;
845          opDIFD = Iop_And64;
846          opUIFU = Iop_Or64;
847          opNOT  = Iop_Not64;
848          opXOR  = Iop_Xor64;
849          opCMP  = Iop_CmpEQ64;
850          top    = mkU64(0xFFFFFFFFFFFFFFFFULL);
851          break;
852       default:
853          VG_(tool_panic)("expensiveCmpEQorNE");
854    }
855 
856    naive
857       = mkPCastTo(mce,ty,
858                   assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
859 
860    vec
861       = assignNew(
862            'V', mce,ty,
863            binop( opOR,
864                   assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
865                   assignNew(
866                      'V', mce,ty,
867                      unop( opNOT,
868                            assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
869 
870    improvement_term
871       = mkPCastTo( mce,ty,
872                    assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
873 
874    improved
875       = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
876 
877    final_cast
878       = mkPCastTo( mce, Ity_I1, improved );
879 
880    return final_cast;
881 }
882 
883 
884 /* --------- Semi-accurate interpretation of CmpORD. --------- */
885 
886 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
887 
888       CmpORD32S(x,y) = 1<<3   if  x <s y
889                      = 1<<2   if  x >s y
890                      = 1<<1   if  x == y
891 
892    and similarly the unsigned variant.  The default interpretation is:
893 
894       CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
895                                   & (7<<1)
896 
897    The "& (7<<1)" reflects the fact that all result bits except 3,2,1
898    are zero and therefore defined (viz, zero).
899 
900    Also deal with a special case better:
901 
902       CmpORD32S(x,0)
903 
904    Here, bit 3 (LT) of the result is a copy of the top bit of x and
905    will be defined even if the rest of x isn't.  In which case we do:
906 
907       CmpORD32S#(x,x#,0,{impliedly 0}#)
908          = PCast(x#) & (3<<1)      -- standard interp for GT#,EQ#
909            | (x# >>u 31) << 3      -- LT# = x#[31]
910 
911    Analogous handling for CmpORD64{S,U}.
912 */
isZeroU32(IRAtom * e)913 static Bool isZeroU32 ( IRAtom* e )
914 {
915    return
916       toBool( e->tag == Iex_Const
917               && e->Iex.Const.con->tag == Ico_U32
918               && e->Iex.Const.con->Ico.U32 == 0 );
919 }
920 
isZeroU64(IRAtom * e)921 static Bool isZeroU64 ( IRAtom* e )
922 {
923    return
924       toBool( e->tag == Iex_Const
925               && e->Iex.Const.con->tag == Ico_U64
926               && e->Iex.Const.con->Ico.U64 == 0 );
927 }
928 
doCmpORD(MCEnv * mce,IROp cmp_op,IRAtom * xxhash,IRAtom * yyhash,IRAtom * xx,IRAtom * yy)929 static IRAtom* doCmpORD ( MCEnv*  mce,
930                           IROp    cmp_op,
931                           IRAtom* xxhash, IRAtom* yyhash,
932                           IRAtom* xx,     IRAtom* yy )
933 {
934    Bool   m64    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
935    Bool   syned  = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
936    IROp   opOR   = m64 ? Iop_Or64  : Iop_Or32;
937    IROp   opAND  = m64 ? Iop_And64 : Iop_And32;
938    IROp   opSHL  = m64 ? Iop_Shl64 : Iop_Shl32;
939    IROp   opSHR  = m64 ? Iop_Shr64 : Iop_Shr32;
940    IRType ty     = m64 ? Ity_I64   : Ity_I32;
941    Int    width  = m64 ? 64        : 32;
942 
943    Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
944 
945    IRAtom* threeLeft1 = NULL;
946    IRAtom* sevenLeft1 = NULL;
947 
948    tl_assert(isShadowAtom(mce,xxhash));
949    tl_assert(isShadowAtom(mce,yyhash));
950    tl_assert(isOriginalAtom(mce,xx));
951    tl_assert(isOriginalAtom(mce,yy));
952    tl_assert(sameKindedAtoms(xxhash,xx));
953    tl_assert(sameKindedAtoms(yyhash,yy));
954    tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
955              || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
956 
957    if (0) {
958       ppIROp(cmp_op); VG_(printf)(" ");
959       ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
960    }
961 
962    if (syned && isZero(yy)) {
963       /* fancy interpretation */
964       /* if yy is zero, then it must be fully defined (zero#). */
965       tl_assert(isZero(yyhash));
966       threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
967       return
968          binop(
969             opOR,
970             assignNew(
971                'V', mce,ty,
972                binop(
973                   opAND,
974                   mkPCastTo(mce,ty, xxhash),
975                   threeLeft1
976                )),
977             assignNew(
978                'V', mce,ty,
979                binop(
980                   opSHL,
981                   assignNew(
982                      'V', mce,ty,
983                      binop(opSHR, xxhash, mkU8(width-1))),
984                   mkU8(3)
985                ))
986 	 );
987    } else {
988       /* standard interpretation */
989       sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
990       return
991          binop(
992             opAND,
993             mkPCastTo( mce,ty,
994                        mkUifU(mce,ty, xxhash,yyhash)),
995             sevenLeft1
996          );
997    }
998 }
999 
1000 
1001 /*------------------------------------------------------------*/
1002 /*--- Emit a test and complaint if something is undefined. ---*/
1003 /*------------------------------------------------------------*/
1004 
1005 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1006 
1007 
1008 /* Set the annotations on a dirty helper to indicate that the stack
1009    pointer and instruction pointers might be read.  This is the
1010    behaviour of all 'emit-a-complaint' style functions we might
1011    call. */
1012 
setHelperAnns(MCEnv * mce,IRDirty * di)1013 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1014    di->nFxState = 2;
1015    di->fxState[0].fx     = Ifx_Read;
1016    di->fxState[0].offset = mce->layout->offset_SP;
1017    di->fxState[0].size   = mce->layout->sizeof_SP;
1018    di->fxState[1].fx     = Ifx_Read;
1019    di->fxState[1].offset = mce->layout->offset_IP;
1020    di->fxState[1].size   = mce->layout->sizeof_IP;
1021 }
1022 
1023 
1024 /* Check the supplied **original** atom for undefinedness, and emit a
1025    complaint if so.  Once that happens, mark it as defined.  This is
1026    possible because the atom is either a tmp or literal.  If it's a
1027    tmp, it will be shadowed by a tmp, and so we can set the shadow to
1028    be defined.  In fact as mentioned above, we will have to allocate a
1029    new tmp to carry the new 'defined' shadow value, and update the
1030    original->tmp mapping accordingly; we cannot simply assign a new
1031    value to an existing shadow tmp as this breaks SSAness -- resulting
1032    in the post-instrumentation sanity checker spluttering in disapproval.
1033 */
complainIfUndefined(MCEnv * mce,IRAtom * atom)1034 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1035 {
1036    IRAtom*  vatom;
1037    IRType   ty;
1038    Int      sz;
1039    IRDirty* di;
1040    IRAtom*  cond;
1041    IRAtom*  origin;
1042    void*    fn;
1043    HChar*   nm;
1044    IRExpr** args;
1045    Int      nargs;
1046 
1047    // Don't do V bit tests if we're not reporting undefined value errors.
1048    if (MC_(clo_mc_level) == 1)
1049       return;
1050 
1051    /* Since the original expression is atomic, there's no duplicated
1052       work generated by making multiple V-expressions for it.  So we
1053       don't really care about the possibility that someone else may
1054       also create a V-interpretion for it. */
1055    tl_assert(isOriginalAtom(mce, atom));
1056    vatom = expr2vbits( mce, atom );
1057    tl_assert(isShadowAtom(mce, vatom));
1058    tl_assert(sameKindedAtoms(atom, vatom));
1059 
1060    ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1061 
1062    /* sz is only used for constructing the error message */
1063    sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1064 
1065    cond = mkPCastTo( mce, Ity_I1, vatom );
1066    /* cond will be 0 if all defined, and 1 if any not defined. */
1067 
1068    /* Get the origin info for the value we are about to check.  At
1069       least, if we are doing origin tracking.  If not, use a dummy
1070       zero origin. */
1071    if (MC_(clo_mc_level) == 3) {
1072       origin = schemeE( mce, atom );
1073       if (mce->hWordTy == Ity_I64) {
1074          origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1075       }
1076    } else {
1077       origin = NULL;
1078    }
1079 
1080    fn    = NULL;
1081    nm    = NULL;
1082    args  = NULL;
1083    nargs = -1;
1084 
1085    switch (sz) {
1086       case 0:
1087          if (origin) {
1088             fn    = &MC_(helperc_value_check0_fail_w_o);
1089             nm    = "MC_(helperc_value_check0_fail_w_o)";
1090             args  = mkIRExprVec_1(origin);
1091             nargs = 1;
1092          } else {
1093             fn    = &MC_(helperc_value_check0_fail_no_o);
1094             nm    = "MC_(helperc_value_check0_fail_no_o)";
1095             args  = mkIRExprVec_0();
1096             nargs = 0;
1097          }
1098          break;
1099       case 1:
1100          if (origin) {
1101             fn    = &MC_(helperc_value_check1_fail_w_o);
1102             nm    = "MC_(helperc_value_check1_fail_w_o)";
1103             args  = mkIRExprVec_1(origin);
1104             nargs = 1;
1105          } else {
1106             fn    = &MC_(helperc_value_check1_fail_no_o);
1107             nm    = "MC_(helperc_value_check1_fail_no_o)";
1108             args  = mkIRExprVec_0();
1109             nargs = 0;
1110          }
1111          break;
1112       case 4:
1113          if (origin) {
1114             fn    = &MC_(helperc_value_check4_fail_w_o);
1115             nm    = "MC_(helperc_value_check4_fail_w_o)";
1116             args  = mkIRExprVec_1(origin);
1117             nargs = 1;
1118          } else {
1119             fn    = &MC_(helperc_value_check4_fail_no_o);
1120             nm    = "MC_(helperc_value_check4_fail_no_o)";
1121             args  = mkIRExprVec_0();
1122             nargs = 0;
1123          }
1124          break;
1125       case 8:
1126          if (origin) {
1127             fn    = &MC_(helperc_value_check8_fail_w_o);
1128             nm    = "MC_(helperc_value_check8_fail_w_o)";
1129             args  = mkIRExprVec_1(origin);
1130             nargs = 1;
1131          } else {
1132             fn    = &MC_(helperc_value_check8_fail_no_o);
1133             nm    = "MC_(helperc_value_check8_fail_no_o)";
1134             args  = mkIRExprVec_0();
1135             nargs = 0;
1136          }
1137          break;
1138       case 2:
1139       case 16:
1140          if (origin) {
1141             fn    = &MC_(helperc_value_checkN_fail_w_o);
1142             nm    = "MC_(helperc_value_checkN_fail_w_o)";
1143             args  = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1144             nargs = 2;
1145          } else {
1146             fn    = &MC_(helperc_value_checkN_fail_no_o);
1147             nm    = "MC_(helperc_value_checkN_fail_no_o)";
1148             args  = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1149             nargs = 1;
1150          }
1151          break;
1152       default:
1153          VG_(tool_panic)("unexpected szB");
1154    }
1155 
1156    tl_assert(fn);
1157    tl_assert(nm);
1158    tl_assert(args);
1159    tl_assert(nargs >= 0 && nargs <= 2);
1160    tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1161               || (MC_(clo_mc_level) == 2 && origin == NULL) );
1162 
1163    di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1164                            VG_(fnptr_to_fnentry)( fn ), args );
1165    di->guard = cond;
1166    setHelperAnns( mce, di );
1167    stmt( 'V', mce, IRStmt_Dirty(di));
1168 
1169    /* Set the shadow tmp to be defined.  First, update the
1170       orig->shadow tmp mapping to reflect the fact that this shadow is
1171       getting a new value. */
1172    tl_assert(isIRAtom(vatom));
1173    /* sameKindedAtoms ... */
1174    if (vatom->tag == Iex_RdTmp) {
1175       tl_assert(atom->tag == Iex_RdTmp);
1176       newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1177       assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1178                        definedOfType(ty));
1179    }
1180 }
1181 
1182 
1183 /*------------------------------------------------------------*/
1184 /*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1185 /*------------------------------------------------------------*/
1186 
1187 /* Examine the always-defined sections declared in layout to see if
1188    the (offset,size) section is within one.  Note, is is an error to
1189    partially fall into such a region: (offset,size) should either be
1190    completely in such a region or completely not-in such a region.
1191 */
isAlwaysDefd(MCEnv * mce,Int offset,Int size)1192 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1193 {
1194    Int minoffD, maxoffD, i;
1195    Int minoff = offset;
1196    Int maxoff = minoff + size - 1;
1197    tl_assert((minoff & ~0xFFFF) == 0);
1198    tl_assert((maxoff & ~0xFFFF) == 0);
1199 
1200    for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1201       minoffD = mce->layout->alwaysDefd[i].offset;
1202       maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1203       tl_assert((minoffD & ~0xFFFF) == 0);
1204       tl_assert((maxoffD & ~0xFFFF) == 0);
1205 
1206       if (maxoff < minoffD || maxoffD < minoff)
1207          continue; /* no overlap */
1208       if (minoff >= minoffD && maxoff <= maxoffD)
1209          return True; /* completely contained in an always-defd section */
1210 
1211       VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1212    }
1213    return False; /* could not find any containing section */
1214 }
1215 
1216 
1217 /* Generate into bb suitable actions to shadow this Put.  If the state
1218    slice is marked 'always defined', do nothing.  Otherwise, write the
1219    supplied V bits to the shadow state.  We can pass in either an
1220    original atom or a V-atom, but not both.  In the former case the
1221    relevant V-bits are then generated from the original.
1222 */
1223 static
do_shadow_PUT(MCEnv * mce,Int offset,IRAtom * atom,IRAtom * vatom)1224 void do_shadow_PUT ( MCEnv* mce,  Int offset,
1225                      IRAtom* atom, IRAtom* vatom )
1226 {
1227    IRType ty;
1228 
1229    // Don't do shadow PUTs if we're not doing undefined value checking.
1230    // Their absence lets Vex's optimiser remove all the shadow computation
1231    // that they depend on, which includes GETs of the shadow registers.
1232    if (MC_(clo_mc_level) == 1)
1233       return;
1234 
1235    if (atom) {
1236       tl_assert(!vatom);
1237       tl_assert(isOriginalAtom(mce, atom));
1238       vatom = expr2vbits( mce, atom );
1239    } else {
1240       tl_assert(vatom);
1241       tl_assert(isShadowAtom(mce, vatom));
1242    }
1243 
1244    ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1245    tl_assert(ty != Ity_I1);
1246    tl_assert(ty != Ity_I128);
1247    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1248       /* later: no ... */
1249       /* emit code to emit a complaint if any of the vbits are 1. */
1250       /* complainIfUndefined(mce, atom); */
1251    } else {
1252       /* Do a plain shadow Put. */
1253       stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1254    }
1255 }
1256 
1257 
1258 /* Return an expression which contains the V bits corresponding to the
1259    given GETI (passed in in pieces).
1260 */
1261 static
do_shadow_PUTI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias,IRAtom * atom)1262 void do_shadow_PUTI ( MCEnv* mce,
1263                       IRRegArray* descr,
1264                       IRAtom* ix, Int bias, IRAtom* atom )
1265 {
1266    IRAtom* vatom;
1267    IRType  ty, tyS;
1268    Int     arrSize;;
1269 
1270    // Don't do shadow PUTIs if we're not doing undefined value checking.
1271    // Their absence lets Vex's optimiser remove all the shadow computation
1272    // that they depend on, which includes GETIs of the shadow registers.
1273    if (MC_(clo_mc_level) == 1)
1274       return;
1275 
1276    tl_assert(isOriginalAtom(mce,atom));
1277    vatom = expr2vbits( mce, atom );
1278    tl_assert(sameKindedAtoms(atom, vatom));
1279    ty   = descr->elemTy;
1280    tyS  = shadowTypeV(ty);
1281    arrSize = descr->nElems * sizeofIRType(ty);
1282    tl_assert(ty != Ity_I1);
1283    tl_assert(isOriginalAtom(mce,ix));
1284    complainIfUndefined(mce,ix);
1285    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1286       /* later: no ... */
1287       /* emit code to emit a complaint if any of the vbits are 1. */
1288       /* complainIfUndefined(mce, atom); */
1289    } else {
1290       /* Do a cloned version of the Put that refers to the shadow
1291          area. */
1292       IRRegArray* new_descr
1293          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1294                          tyS, descr->nElems);
1295       stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom ));
1296    }
1297 }
1298 
1299 
1300 /* Return an expression which contains the V bits corresponding to the
1301    given GET (passed in in pieces).
1302 */
1303 static
shadow_GET(MCEnv * mce,Int offset,IRType ty)1304 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1305 {
1306    IRType tyS = shadowTypeV(ty);
1307    tl_assert(ty != Ity_I1);
1308    tl_assert(ty != Ity_I128);
1309    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1310       /* Always defined, return all zeroes of the relevant type */
1311       return definedOfType(tyS);
1312    } else {
1313       /* return a cloned version of the Get that refers to the shadow
1314          area. */
1315       /* FIXME: this isn't an atom! */
1316       return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1317    }
1318 }
1319 
1320 
1321 /* Return an expression which contains the V bits corresponding to the
1322    given GETI (passed in in pieces).
1323 */
1324 static
shadow_GETI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias)1325 IRExpr* shadow_GETI ( MCEnv* mce,
1326                       IRRegArray* descr, IRAtom* ix, Int bias )
1327 {
1328    IRType ty   = descr->elemTy;
1329    IRType tyS  = shadowTypeV(ty);
1330    Int arrSize = descr->nElems * sizeofIRType(ty);
1331    tl_assert(ty != Ity_I1);
1332    tl_assert(isOriginalAtom(mce,ix));
1333    complainIfUndefined(mce,ix);
1334    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1335       /* Always defined, return all zeroes of the relevant type */
1336       return definedOfType(tyS);
1337    } else {
1338       /* return a cloned version of the Get that refers to the shadow
1339          area. */
1340       IRRegArray* new_descr
1341          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1342                          tyS, descr->nElems);
1343       return IRExpr_GetI( new_descr, ix, bias );
1344    }
1345 }
1346 
1347 
1348 /*------------------------------------------------------------*/
1349 /*--- Generating approximations for unknown operations,    ---*/
1350 /*--- using lazy-propagate semantics                       ---*/
1351 /*------------------------------------------------------------*/
1352 
1353 /* Lazy propagation of undefinedness from two values, resulting in the
1354    specified shadow type.
1355 */
1356 static
mkLazy2(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2)1357 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1358 {
1359    IRAtom* at;
1360    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1361    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1362    tl_assert(isShadowAtom(mce,va1));
1363    tl_assert(isShadowAtom(mce,va2));
1364 
1365    /* The general case is inefficient because PCast is an expensive
1366       operation.  Here are some special cases which use PCast only
1367       once rather than twice. */
1368 
1369    /* I64 x I64 -> I64 */
1370    if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1371       if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1372       at = mkUifU(mce, Ity_I64, va1, va2);
1373       at = mkPCastTo(mce, Ity_I64, at);
1374       return at;
1375    }
1376 
1377    /* I64 x I64 -> I32 */
1378    if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1379       if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1380       at = mkUifU(mce, Ity_I64, va1, va2);
1381       at = mkPCastTo(mce, Ity_I32, at);
1382       return at;
1383    }
1384 
1385    if (0) {
1386       VG_(printf)("mkLazy2 ");
1387       ppIRType(t1);
1388       VG_(printf)("_");
1389       ppIRType(t2);
1390       VG_(printf)("_");
1391       ppIRType(finalVty);
1392       VG_(printf)("\n");
1393    }
1394 
1395    /* General case: force everything via 32-bit intermediaries. */
1396    at = mkPCastTo(mce, Ity_I32, va1);
1397    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1398    at = mkPCastTo(mce, finalVty, at);
1399    return at;
1400 }
1401 
1402 
1403 /* 3-arg version of the above. */
1404 static
mkLazy3(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2,IRAtom * va3)1405 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1406                   IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1407 {
1408    IRAtom* at;
1409    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1410    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1411    IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1412    tl_assert(isShadowAtom(mce,va1));
1413    tl_assert(isShadowAtom(mce,va2));
1414    tl_assert(isShadowAtom(mce,va3));
1415 
1416    /* The general case is inefficient because PCast is an expensive
1417       operation.  Here are some special cases which use PCast only
1418       twice rather than three times. */
1419 
1420    /* I32 x I64 x I64 -> I64 */
1421    /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1422    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1423        && finalVty == Ity_I64) {
1424       if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1425       /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1426          mode indication which is fully defined, this should get
1427          folded out later. */
1428       at = mkPCastTo(mce, Ity_I64, va1);
1429       /* Now fold in 2nd and 3rd args. */
1430       at = mkUifU(mce, Ity_I64, at, va2);
1431       at = mkUifU(mce, Ity_I64, at, va3);
1432       /* and PCast once again. */
1433       at = mkPCastTo(mce, Ity_I64, at);
1434       return at;
1435    }
1436 
1437    /* I32 x I64 x I64 -> I32 */
1438    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1439        && finalVty == Ity_I32) {
1440       if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1441       at = mkPCastTo(mce, Ity_I64, va1);
1442       at = mkUifU(mce, Ity_I64, at, va2);
1443       at = mkUifU(mce, Ity_I64, at, va3);
1444       at = mkPCastTo(mce, Ity_I32, at);
1445       return at;
1446    }
1447 
1448    /* I32 x I32 x I32 -> I32 */
1449    /* 32-bit FP idiom, as (eg) happens on ARM */
1450    if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1451        && finalVty == Ity_I32) {
1452       if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1453       at = va1;
1454       at = mkUifU(mce, Ity_I32, at, va2);
1455       at = mkUifU(mce, Ity_I32, at, va3);
1456       at = mkPCastTo(mce, Ity_I32, at);
1457       return at;
1458    }
1459 
1460    /* I32 x I128 x I128 -> I128 */
1461    /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1462    if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1463        && finalVty == Ity_I128) {
1464       if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1465       /* Widen 1st arg to I128.  Since 1st arg is typically a rounding
1466          mode indication which is fully defined, this should get
1467          folded out later. */
1468       at = mkPCastTo(mce, Ity_I128, va1);
1469       /* Now fold in 2nd and 3rd args. */
1470       at = mkUifU(mce, Ity_I128, at, va2);
1471       at = mkUifU(mce, Ity_I128, at, va3);
1472       /* and PCast once again. */
1473       at = mkPCastTo(mce, Ity_I128, at);
1474       return at;
1475    }
1476    if (1) {
1477       VG_(printf)("mkLazy3: ");
1478       ppIRType(t1);
1479       VG_(printf)(" x ");
1480       ppIRType(t2);
1481       VG_(printf)(" x ");
1482       ppIRType(t3);
1483       VG_(printf)(" -> ");
1484       ppIRType(finalVty);
1485       VG_(printf)("\n");
1486    }
1487 
1488    tl_assert(0);
1489    /* General case: force everything via 32-bit intermediaries. */
1490    /*
1491    at = mkPCastTo(mce, Ity_I32, va1);
1492    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1493    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1494    at = mkPCastTo(mce, finalVty, at);
1495    return at;
1496    */
1497 }
1498 
1499 
1500 /* 4-arg version of the above. */
1501 static
mkLazy4(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2,IRAtom * va3,IRAtom * va4)1502 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1503                   IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1504 {
1505    IRAtom* at;
1506    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1507    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1508    IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1509    IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1510    tl_assert(isShadowAtom(mce,va1));
1511    tl_assert(isShadowAtom(mce,va2));
1512    tl_assert(isShadowAtom(mce,va3));
1513    tl_assert(isShadowAtom(mce,va4));
1514 
1515    /* The general case is inefficient because PCast is an expensive
1516       operation.  Here are some special cases which use PCast only
1517       twice rather than three times. */
1518 
1519    /* I32 x I64 x I64 x I64 -> I64 */
1520    /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1521    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1522        && finalVty == Ity_I64) {
1523       if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1524       /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1525          mode indication which is fully defined, this should get
1526          folded out later. */
1527       at = mkPCastTo(mce, Ity_I64, va1);
1528       /* Now fold in 2nd, 3rd, 4th args. */
1529       at = mkUifU(mce, Ity_I64, at, va2);
1530       at = mkUifU(mce, Ity_I64, at, va3);
1531       at = mkUifU(mce, Ity_I64, at, va4);
1532       /* and PCast once again. */
1533       at = mkPCastTo(mce, Ity_I64, at);
1534       return at;
1535    }
1536    /* I32 x I32 x I32 x I32 -> I32 */
1537    /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1538    if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1539        && finalVty == Ity_I32) {
1540       if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1541       at = va1;
1542       /* Now fold in 2nd, 3rd, 4th args. */
1543       at = mkUifU(mce, Ity_I32, at, va2);
1544       at = mkUifU(mce, Ity_I32, at, va3);
1545       at = mkUifU(mce, Ity_I32, at, va4);
1546       at = mkPCastTo(mce, Ity_I32, at);
1547       return at;
1548    }
1549 
1550    if (1) {
1551       VG_(printf)("mkLazy4: ");
1552       ppIRType(t1);
1553       VG_(printf)(" x ");
1554       ppIRType(t2);
1555       VG_(printf)(" x ");
1556       ppIRType(t3);
1557       VG_(printf)(" x ");
1558       ppIRType(t4);
1559       VG_(printf)(" -> ");
1560       ppIRType(finalVty);
1561       VG_(printf)("\n");
1562    }
1563 
1564    tl_assert(0);
1565 }
1566 
1567 
1568 /* Do the lazy propagation game from a null-terminated vector of
1569    atoms.  This is presumably the arguments to a helper call, so the
1570    IRCallee info is also supplied in order that we can know which
1571    arguments should be ignored (via the .mcx_mask field).
1572 */
1573 static
mkLazyN(MCEnv * mce,IRAtom ** exprvec,IRType finalVtype,IRCallee * cee)1574 IRAtom* mkLazyN ( MCEnv* mce,
1575                   IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1576 {
1577    Int     i;
1578    IRAtom* here;
1579    IRAtom* curr;
1580    IRType  mergeTy;
1581    Bool    mergeTy64 = True;
1582 
1583    /* Decide on the type of the merge intermediary.  If all relevant
1584       args are I64, then it's I64.  In all other circumstances, use
1585       I32. */
1586    for (i = 0; exprvec[i]; i++) {
1587       tl_assert(i < 32);
1588       tl_assert(isOriginalAtom(mce, exprvec[i]));
1589       if (cee->mcx_mask & (1<<i))
1590          continue;
1591       if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
1592          mergeTy64 = False;
1593    }
1594 
1595    mergeTy = mergeTy64  ? Ity_I64  : Ity_I32;
1596    curr    = definedOfType(mergeTy);
1597 
1598    for (i = 0; exprvec[i]; i++) {
1599       tl_assert(i < 32);
1600       tl_assert(isOriginalAtom(mce, exprvec[i]));
1601       /* Only take notice of this arg if the callee's mc-exclusion
1602          mask does not say it is to be excluded. */
1603       if (cee->mcx_mask & (1<<i)) {
1604          /* the arg is to be excluded from definedness checking.  Do
1605             nothing. */
1606          if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1607       } else {
1608          /* calculate the arg's definedness, and pessimistically merge
1609             it in. */
1610          here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1611          curr = mergeTy64
1612                    ? mkUifU64(mce, here, curr)
1613                    : mkUifU32(mce, here, curr);
1614       }
1615    }
1616    return mkPCastTo(mce, finalVtype, curr );
1617 }
1618 
1619 
1620 /*------------------------------------------------------------*/
1621 /*--- Generating expensive sequences for exact carry-chain ---*/
1622 /*--- propagation in add/sub and related operations.       ---*/
1623 /*------------------------------------------------------------*/
1624 
1625 static
expensiveAddSub(MCEnv * mce,Bool add,IRType ty,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1626 IRAtom* expensiveAddSub ( MCEnv*  mce,
1627                           Bool    add,
1628                           IRType  ty,
1629                           IRAtom* qaa, IRAtom* qbb,
1630                           IRAtom* aa,  IRAtom* bb )
1631 {
1632    IRAtom *a_min, *b_min, *a_max, *b_max;
1633    IROp   opAND, opOR, opXOR, opNOT, opADD, opSUB;
1634 
1635    tl_assert(isShadowAtom(mce,qaa));
1636    tl_assert(isShadowAtom(mce,qbb));
1637    tl_assert(isOriginalAtom(mce,aa));
1638    tl_assert(isOriginalAtom(mce,bb));
1639    tl_assert(sameKindedAtoms(qaa,aa));
1640    tl_assert(sameKindedAtoms(qbb,bb));
1641 
1642    switch (ty) {
1643       case Ity_I32:
1644          opAND = Iop_And32;
1645          opOR  = Iop_Or32;
1646          opXOR = Iop_Xor32;
1647          opNOT = Iop_Not32;
1648          opADD = Iop_Add32;
1649          opSUB = Iop_Sub32;
1650          break;
1651       case Ity_I64:
1652          opAND = Iop_And64;
1653          opOR  = Iop_Or64;
1654          opXOR = Iop_Xor64;
1655          opNOT = Iop_Not64;
1656          opADD = Iop_Add64;
1657          opSUB = Iop_Sub64;
1658          break;
1659       default:
1660          VG_(tool_panic)("expensiveAddSub");
1661    }
1662 
1663    // a_min = aa & ~qaa
1664    a_min = assignNew('V', mce,ty,
1665                      binop(opAND, aa,
1666                                   assignNew('V', mce,ty, unop(opNOT, qaa))));
1667 
1668    // b_min = bb & ~qbb
1669    b_min = assignNew('V', mce,ty,
1670                      binop(opAND, bb,
1671                                   assignNew('V', mce,ty, unop(opNOT, qbb))));
1672 
1673    // a_max = aa | qaa
1674    a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
1675 
1676    // b_max = bb | qbb
1677    b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
1678 
1679    if (add) {
1680       // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1681       return
1682       assignNew('V', mce,ty,
1683          binop( opOR,
1684                 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1685                 assignNew('V', mce,ty,
1686                    binop( opXOR,
1687                           assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1688                           assignNew('V', mce,ty, binop(opADD, a_max, b_max))
1689                    )
1690                 )
1691          )
1692       );
1693    } else {
1694       // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1695       return
1696       assignNew('V', mce,ty,
1697          binop( opOR,
1698                 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1699                 assignNew('V', mce,ty,
1700                    binop( opXOR,
1701                           assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1702                           assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
1703                    )
1704                 )
1705          )
1706       );
1707    }
1708 
1709 }
1710 
1711 
1712 /*------------------------------------------------------------*/
1713 /*--- Scalar shifts.                                       ---*/
1714 /*------------------------------------------------------------*/
1715 
1716 /* Produce an interpretation for (aa << bb) (or >>s, >>u).  The basic
1717    idea is to shift the definedness bits by the original shift amount.
1718    This introduces 0s ("defined") in new positions for left shifts and
1719    unsigned right shifts, and copies the top definedness bit for
1720    signed right shifts.  So, conveniently, applying the original shift
1721    operator to the definedness bits for the left arg is exactly the
1722    right thing to do:
1723 
1724       (qaa << bb)
1725 
1726    However if the shift amount is undefined then the whole result
1727    is undefined.  Hence need:
1728 
1729       (qaa << bb) `UifU` PCast(qbb)
1730 
1731    If the shift amount bb is a literal than qbb will say 'all defined'
1732    and the UifU and PCast will get folded out by post-instrumentation
1733    optimisation.
1734 */
scalarShift(MCEnv * mce,IRType ty,IROp original_op,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1735 static IRAtom* scalarShift ( MCEnv*  mce,
1736                              IRType  ty,
1737                              IROp    original_op,
1738                              IRAtom* qaa, IRAtom* qbb,
1739                              IRAtom* aa,  IRAtom* bb )
1740 {
1741    tl_assert(isShadowAtom(mce,qaa));
1742    tl_assert(isShadowAtom(mce,qbb));
1743    tl_assert(isOriginalAtom(mce,aa));
1744    tl_assert(isOriginalAtom(mce,bb));
1745    tl_assert(sameKindedAtoms(qaa,aa));
1746    tl_assert(sameKindedAtoms(qbb,bb));
1747    return
1748       assignNew(
1749          'V', mce, ty,
1750          mkUifU( mce, ty,
1751                  assignNew('V', mce, ty, binop(original_op, qaa, bb)),
1752                  mkPCastTo(mce, ty, qbb)
1753          )
1754    );
1755 }
1756 
1757 
1758 /*------------------------------------------------------------*/
1759 /*--- Helpers for dealing with vector primops.             ---*/
1760 /*------------------------------------------------------------*/
1761 
1762 /* Vector pessimisation -- pessimise within each lane individually. */
1763 
mkPCast8x16(MCEnv * mce,IRAtom * at)1764 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1765 {
1766    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1767 }
1768 
mkPCast16x8(MCEnv * mce,IRAtom * at)1769 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1770 {
1771    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1772 }
1773 
mkPCast32x4(MCEnv * mce,IRAtom * at)1774 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1775 {
1776    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1777 }
1778 
mkPCast64x2(MCEnv * mce,IRAtom * at)1779 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1780 {
1781    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1782 }
1783 
mkPCast32x2(MCEnv * mce,IRAtom * at)1784 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1785 {
1786    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1787 }
1788 
mkPCast16x4(MCEnv * mce,IRAtom * at)1789 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1790 {
1791    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1792 }
1793 
mkPCast8x8(MCEnv * mce,IRAtom * at)1794 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1795 {
1796    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1797 }
1798 
mkPCast16x2(MCEnv * mce,IRAtom * at)1799 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1800 {
1801    return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1802 }
1803 
mkPCast8x4(MCEnv * mce,IRAtom * at)1804 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1805 {
1806    return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1807 }
1808 
1809 
1810 /* Here's a simple scheme capable of handling ops derived from SSE1
1811    code and while only generating ops that can be efficiently
1812    implemented in SSE1. */
1813 
1814 /* All-lanes versions are straightforward:
1815 
1816    binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1817 
1818    unary32Fx4(x,y)    ==> PCast32x4(x#)
1819 
1820    Lowest-lane-only versions are more complex:
1821 
1822    binary32F0x4(x,y)  ==> SetV128lo32(
1823                              x#,
1824                              PCast32(V128to32(UifUV128(x#,y#)))
1825                           )
1826 
1827    This is perhaps not so obvious.  In particular, it's faster to
1828    do a V128-bit UifU and then take the bottom 32 bits than the more
1829    obvious scheme of taking the bottom 32 bits of each operand
1830    and doing a 32-bit UifU.  Basically since UifU is fast and
1831    chopping lanes off vector values is slow.
1832 
1833    Finally:
1834 
1835    unary32F0x4(x)     ==> SetV128lo32(
1836                              x#,
1837                              PCast32(V128to32(x#))
1838                           )
1839 
1840    Where:
1841 
1842    PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
1843    PCast32x4(v#) = CmpNEZ32x4(v#)
1844 */
1845 
1846 static
binary32Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1847 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1848 {
1849    IRAtom* at;
1850    tl_assert(isShadowAtom(mce, vatomX));
1851    tl_assert(isShadowAtom(mce, vatomY));
1852    at = mkUifUV128(mce, vatomX, vatomY);
1853    at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
1854    return at;
1855 }
1856 
1857 static
unary32Fx4(MCEnv * mce,IRAtom * vatomX)1858 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1859 {
1860    IRAtom* at;
1861    tl_assert(isShadowAtom(mce, vatomX));
1862    at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
1863    return at;
1864 }
1865 
1866 static
binary32F0x4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1867 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1868 {
1869    IRAtom* at;
1870    tl_assert(isShadowAtom(mce, vatomX));
1871    tl_assert(isShadowAtom(mce, vatomY));
1872    at = mkUifUV128(mce, vatomX, vatomY);
1873    at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
1874    at = mkPCastTo(mce, Ity_I32, at);
1875    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1876    return at;
1877 }
1878 
1879 static
unary32F0x4(MCEnv * mce,IRAtom * vatomX)1880 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1881 {
1882    IRAtom* at;
1883    tl_assert(isShadowAtom(mce, vatomX));
1884    at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
1885    at = mkPCastTo(mce, Ity_I32, at);
1886    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1887    return at;
1888 }
1889 
1890 /* --- ... and ... 64Fx2 versions of the same ... --- */
1891 
1892 static
binary64Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1893 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1894 {
1895    IRAtom* at;
1896    tl_assert(isShadowAtom(mce, vatomX));
1897    tl_assert(isShadowAtom(mce, vatomY));
1898    at = mkUifUV128(mce, vatomX, vatomY);
1899    at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
1900    return at;
1901 }
1902 
1903 static
unary64Fx2(MCEnv * mce,IRAtom * vatomX)1904 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1905 {
1906    IRAtom* at;
1907    tl_assert(isShadowAtom(mce, vatomX));
1908    at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
1909    return at;
1910 }
1911 
1912 static
binary64F0x2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1913 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1914 {
1915    IRAtom* at;
1916    tl_assert(isShadowAtom(mce, vatomX));
1917    tl_assert(isShadowAtom(mce, vatomY));
1918    at = mkUifUV128(mce, vatomX, vatomY);
1919    at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
1920    at = mkPCastTo(mce, Ity_I64, at);
1921    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1922    return at;
1923 }
1924 
1925 static
unary64F0x2(MCEnv * mce,IRAtom * vatomX)1926 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1927 {
1928    IRAtom* at;
1929    tl_assert(isShadowAtom(mce, vatomX));
1930    at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
1931    at = mkPCastTo(mce, Ity_I64, at);
1932    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1933    return at;
1934 }
1935 
1936 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
1937 
1938 static
binary32Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1939 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1940 {
1941    IRAtom* at;
1942    tl_assert(isShadowAtom(mce, vatomX));
1943    tl_assert(isShadowAtom(mce, vatomY));
1944    at = mkUifU64(mce, vatomX, vatomY);
1945    at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
1946    return at;
1947 }
1948 
1949 static
unary32Fx2(MCEnv * mce,IRAtom * vatomX)1950 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
1951 {
1952    IRAtom* at;
1953    tl_assert(isShadowAtom(mce, vatomX));
1954    at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
1955    return at;
1956 }
1957 
1958 /* --- --- Vector saturated narrowing --- --- */
1959 
1960 /* We used to do something very clever here, but on closer inspection
1961    (2011-Jun-15), and in particular bug #279698, it turns out to be
1962    wrong.  Part of the problem came from the fact that for a long
1963    time, the IR primops to do with saturated narrowing were
1964    underspecified and managed to confuse multiple cases which needed
1965    to be separate: the op names had a signedness qualifier, but in
1966    fact the source and destination signednesses needed to be specified
1967    independently, so the op names really need two independent
1968    signedness specifiers.
1969 
1970    As of 2011-Jun-15 (ish) the underspecification was sorted out
1971    properly.  The incorrect instrumentation remained, though.  That
1972    has now (2011-Oct-22) been fixed.
1973 
1974    What we now do is simple:
1975 
1976    Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
1977    number of lanes, X is the source lane width and signedness, and Y
1978    is the destination lane width and signedness.  In all cases the
1979    destination lane width is half the source lane width, so the names
1980    have a bit of redundancy, but are at least easy to read.
1981 
1982    For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
1983    to unsigned 16s.
1984 
1985    Let Vanilla(OP) be a function that takes OP, one of these
1986    saturating narrowing ops, and produces the same "shaped" narrowing
1987    op which is not saturating, but merely dumps the most significant
1988    bits.  "same shape" means that the lane numbers and widths are the
1989    same as with OP.
1990 
1991    For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
1992                   = Iop_NarrowBin32to16x8,
1993    that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
1994    dumping the top half of each lane.
1995 
1996    So, with that in place, the scheme is simple, and it is simple to
1997    pessimise each lane individually and then apply Vanilla(OP) so as
1998    to get the result in the right "shape".  If the original OP is
1999    QNarrowBinXtoYxZ then we produce
2000 
2001    Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2002 
2003    or for the case when OP is unary (Iop_QNarrowUn*)
2004 
2005    Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2006 */
2007 static
vanillaNarrowingOpOfShape(IROp qnarrowOp)2008 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2009 {
2010    switch (qnarrowOp) {
2011       /* Binary: (128, 128) -> 128 */
2012       case Iop_QNarrowBin16Sto8Ux16:
2013       case Iop_QNarrowBin16Sto8Sx16:
2014       case Iop_QNarrowBin16Uto8Ux16:
2015          return Iop_NarrowBin16to8x16;
2016       case Iop_QNarrowBin32Sto16Ux8:
2017       case Iop_QNarrowBin32Sto16Sx8:
2018       case Iop_QNarrowBin32Uto16Ux8:
2019          return Iop_NarrowBin32to16x8;
2020       /* Binary: (64, 64) -> 64 */
2021       case Iop_QNarrowBin32Sto16Sx4:
2022          return Iop_NarrowBin32to16x4;
2023       case Iop_QNarrowBin16Sto8Ux8:
2024       case Iop_QNarrowBin16Sto8Sx8:
2025          return Iop_NarrowBin16to8x8;
2026       /* Unary: 128 -> 64 */
2027       case Iop_QNarrowUn64Uto32Ux2:
2028       case Iop_QNarrowUn64Sto32Sx2:
2029       case Iop_QNarrowUn64Sto32Ux2:
2030          return Iop_NarrowUn64to32x2;
2031       case Iop_QNarrowUn32Uto16Ux4:
2032       case Iop_QNarrowUn32Sto16Sx4:
2033       case Iop_QNarrowUn32Sto16Ux4:
2034          return Iop_NarrowUn32to16x4;
2035       case Iop_QNarrowUn16Uto8Ux8:
2036       case Iop_QNarrowUn16Sto8Sx8:
2037       case Iop_QNarrowUn16Sto8Ux8:
2038          return Iop_NarrowUn16to8x8;
2039       default:
2040          ppIROp(qnarrowOp);
2041          VG_(tool_panic)("vanillaNarrowOpOfShape");
2042    }
2043 }
2044 
2045 static
vectorNarrowBinV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)2046 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2047                               IRAtom* vatom1, IRAtom* vatom2)
2048 {
2049    IRAtom *at1, *at2, *at3;
2050    IRAtom* (*pcast)( MCEnv*, IRAtom* );
2051    switch (narrow_op) {
2052       case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2053       case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2054       case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2055       case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2056       case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2057       case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2058       default: VG_(tool_panic)("vectorNarrowBinV128");
2059    }
2060    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2061    tl_assert(isShadowAtom(mce,vatom1));
2062    tl_assert(isShadowAtom(mce,vatom2));
2063    at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2064    at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2065    at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
2066    return at3;
2067 }
2068 
2069 static
vectorNarrowBin64(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)2070 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2071                             IRAtom* vatom1, IRAtom* vatom2)
2072 {
2073    IRAtom *at1, *at2, *at3;
2074    IRAtom* (*pcast)( MCEnv*, IRAtom* );
2075    switch (narrow_op) {
2076       case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2077       case Iop_QNarrowBin16Sto8Sx8:  pcast = mkPCast16x4; break;
2078       case Iop_QNarrowBin16Sto8Ux8:  pcast = mkPCast16x4; break;
2079       default: VG_(tool_panic)("vectorNarrowBin64");
2080    }
2081    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2082    tl_assert(isShadowAtom(mce,vatom1));
2083    tl_assert(isShadowAtom(mce,vatom2));
2084    at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2085    at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2086    at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
2087    return at3;
2088 }
2089 
2090 static
vectorNarrowUnV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1)2091 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
2092                              IRAtom* vatom1)
2093 {
2094    IRAtom *at1, *at2;
2095    IRAtom* (*pcast)( MCEnv*, IRAtom* );
2096    tl_assert(isShadowAtom(mce,vatom1));
2097    /* For vanilla narrowing (non-saturating), we can just apply
2098       the op directly to the V bits. */
2099    switch (narrow_op) {
2100       case Iop_NarrowUn16to8x8:
2101       case Iop_NarrowUn32to16x4:
2102       case Iop_NarrowUn64to32x2:
2103          at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2104          return at1;
2105       default:
2106          break; /* Do Plan B */
2107    }
2108    /* Plan B: for ops that involve a saturation operation on the args,
2109       we must PCast before the vanilla narrow. */
2110    switch (narrow_op) {
2111       case Iop_QNarrowUn16Sto8Sx8:  pcast = mkPCast16x8; break;
2112       case Iop_QNarrowUn16Sto8Ux8:  pcast = mkPCast16x8; break;
2113       case Iop_QNarrowUn16Uto8Ux8:  pcast = mkPCast16x8; break;
2114       case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2115       case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2116       case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2117       case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2118       case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2119       case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2120       default: VG_(tool_panic)("vectorNarrowUnV128");
2121    }
2122    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2123    at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2124    at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
2125    return at2;
2126 }
2127 
2128 static
vectorWidenI64(MCEnv * mce,IROp longen_op,IRAtom * vatom1)2129 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2130                          IRAtom* vatom1)
2131 {
2132    IRAtom *at1, *at2;
2133    IRAtom* (*pcast)( MCEnv*, IRAtom* );
2134    switch (longen_op) {
2135       case Iop_Widen8Uto16x8:  pcast = mkPCast16x8; break;
2136       case Iop_Widen8Sto16x8:  pcast = mkPCast16x8; break;
2137       case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2138       case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2139       case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2140       case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2141       default: VG_(tool_panic)("vectorWidenI64");
2142    }
2143    tl_assert(isShadowAtom(mce,vatom1));
2144    at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2145    at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2146    return at2;
2147 }
2148 
2149 
2150 /* --- --- Vector integer arithmetic --- --- */
2151 
2152 /* Simple ... UifU the args and per-lane pessimise the results. */
2153 
2154 /* --- V128-bit versions --- */
2155 
2156 static
binary8Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2157 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2158 {
2159    IRAtom* at;
2160    at = mkUifUV128(mce, vatom1, vatom2);
2161    at = mkPCast8x16(mce, at);
2162    return at;
2163 }
2164 
2165 static
binary16Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2166 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2167 {
2168    IRAtom* at;
2169    at = mkUifUV128(mce, vatom1, vatom2);
2170    at = mkPCast16x8(mce, at);
2171    return at;
2172 }
2173 
2174 static
binary32Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2175 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2176 {
2177    IRAtom* at;
2178    at = mkUifUV128(mce, vatom1, vatom2);
2179    at = mkPCast32x4(mce, at);
2180    return at;
2181 }
2182 
2183 static
binary64Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2184 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2185 {
2186    IRAtom* at;
2187    at = mkUifUV128(mce, vatom1, vatom2);
2188    at = mkPCast64x2(mce, at);
2189    return at;
2190 }
2191 
2192 /* --- 64-bit versions --- */
2193 
2194 static
binary8Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2195 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2196 {
2197    IRAtom* at;
2198    at = mkUifU64(mce, vatom1, vatom2);
2199    at = mkPCast8x8(mce, at);
2200    return at;
2201 }
2202 
2203 static
binary16Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2204 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2205 {
2206    IRAtom* at;
2207    at = mkUifU64(mce, vatom1, vatom2);
2208    at = mkPCast16x4(mce, at);
2209    return at;
2210 }
2211 
2212 static
binary32Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2213 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2214 {
2215    IRAtom* at;
2216    at = mkUifU64(mce, vatom1, vatom2);
2217    at = mkPCast32x2(mce, at);
2218    return at;
2219 }
2220 
2221 static
binary64Ix1(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2222 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2223 {
2224    IRAtom* at;
2225    at = mkUifU64(mce, vatom1, vatom2);
2226    at = mkPCastTo(mce, Ity_I64, at);
2227    return at;
2228 }
2229 
2230 /* --- 32-bit versions --- */
2231 
2232 static
binary8Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2233 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2234 {
2235    IRAtom* at;
2236    at = mkUifU32(mce, vatom1, vatom2);
2237    at = mkPCast8x4(mce, at);
2238    return at;
2239 }
2240 
2241 static
binary16Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2242 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2243 {
2244    IRAtom* at;
2245    at = mkUifU32(mce, vatom1, vatom2);
2246    at = mkPCast16x2(mce, at);
2247    return at;
2248 }
2249 
2250 
2251 /*------------------------------------------------------------*/
2252 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
2253 /*------------------------------------------------------------*/
2254 
2255 static
expr2vbits_Qop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2,IRAtom * atom3,IRAtom * atom4)2256 IRAtom* expr2vbits_Qop ( MCEnv* mce,
2257                          IROp op,
2258                          IRAtom* atom1, IRAtom* atom2,
2259                          IRAtom* atom3, IRAtom* atom4 )
2260 {
2261    IRAtom* vatom1 = expr2vbits( mce, atom1 );
2262    IRAtom* vatom2 = expr2vbits( mce, atom2 );
2263    IRAtom* vatom3 = expr2vbits( mce, atom3 );
2264    IRAtom* vatom4 = expr2vbits( mce, atom4 );
2265 
2266    tl_assert(isOriginalAtom(mce,atom1));
2267    tl_assert(isOriginalAtom(mce,atom2));
2268    tl_assert(isOriginalAtom(mce,atom3));
2269    tl_assert(isOriginalAtom(mce,atom4));
2270    tl_assert(isShadowAtom(mce,vatom1));
2271    tl_assert(isShadowAtom(mce,vatom2));
2272    tl_assert(isShadowAtom(mce,vatom3));
2273    tl_assert(isShadowAtom(mce,vatom4));
2274    tl_assert(sameKindedAtoms(atom1,vatom1));
2275    tl_assert(sameKindedAtoms(atom2,vatom2));
2276    tl_assert(sameKindedAtoms(atom3,vatom3));
2277    tl_assert(sameKindedAtoms(atom4,vatom4));
2278    switch (op) {
2279       case Iop_MAddF64:
2280       case Iop_MAddF64r32:
2281       case Iop_MSubF64:
2282       case Iop_MSubF64r32:
2283          /* I32(rm) x F64 x F64 x F64 -> F64 */
2284          return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2285 
2286       case Iop_MAddF32:
2287       case Iop_MSubF32:
2288          /* I32(rm) x F32 x F32 x F32 -> F32 */
2289          return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2290 
2291       default:
2292          ppIROp(op);
2293          VG_(tool_panic)("memcheck:expr2vbits_Qop");
2294    }
2295 }
2296 
2297 
2298 static
expr2vbits_Triop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2,IRAtom * atom3)2299 IRAtom* expr2vbits_Triop ( MCEnv* mce,
2300                            IROp op,
2301                            IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2302 {
2303    IRAtom* vatom1 = expr2vbits( mce, atom1 );
2304    IRAtom* vatom2 = expr2vbits( mce, atom2 );
2305    IRAtom* vatom3 = expr2vbits( mce, atom3 );
2306 
2307    tl_assert(isOriginalAtom(mce,atom1));
2308    tl_assert(isOriginalAtom(mce,atom2));
2309    tl_assert(isOriginalAtom(mce,atom3));
2310    tl_assert(isShadowAtom(mce,vatom1));
2311    tl_assert(isShadowAtom(mce,vatom2));
2312    tl_assert(isShadowAtom(mce,vatom3));
2313    tl_assert(sameKindedAtoms(atom1,vatom1));
2314    tl_assert(sameKindedAtoms(atom2,vatom2));
2315    tl_assert(sameKindedAtoms(atom3,vatom3));
2316    switch (op) {
2317       case Iop_AddF128:
2318       case Iop_SubF128:
2319       case Iop_MulF128:
2320       case Iop_DivF128:
2321          /* I32(rm) x F128 x F128 -> F128 */
2322          return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2323       case Iop_AddF64:
2324       case Iop_AddF64r32:
2325       case Iop_SubF64:
2326       case Iop_SubF64r32:
2327       case Iop_MulF64:
2328       case Iop_MulF64r32:
2329       case Iop_DivF64:
2330       case Iop_DivF64r32:
2331       case Iop_ScaleF64:
2332       case Iop_Yl2xF64:
2333       case Iop_Yl2xp1F64:
2334       case Iop_AtanF64:
2335       case Iop_PRemF64:
2336       case Iop_PRem1F64:
2337          /* I32(rm) x F64 x F64 -> F64 */
2338          return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2339       case Iop_PRemC3210F64:
2340       case Iop_PRem1C3210F64:
2341          /* I32(rm) x F64 x F64 -> I32 */
2342          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2343       case Iop_AddF32:
2344       case Iop_SubF32:
2345       case Iop_MulF32:
2346       case Iop_DivF32:
2347          /* I32(rm) x F32 x F32 -> I32 */
2348          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2349       case Iop_ExtractV128:
2350          complainIfUndefined(mce, atom3);
2351          return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2352       case Iop_Extract64:
2353          complainIfUndefined(mce, atom3);
2354          return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2355       case Iop_SetElem8x8:
2356       case Iop_SetElem16x4:
2357       case Iop_SetElem32x2:
2358          complainIfUndefined(mce, atom2);
2359          return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
2360       default:
2361          ppIROp(op);
2362          VG_(tool_panic)("memcheck:expr2vbits_Triop");
2363    }
2364 }
2365 
2366 
2367 static
expr2vbits_Binop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2)2368 IRAtom* expr2vbits_Binop ( MCEnv* mce,
2369                            IROp op,
2370                            IRAtom* atom1, IRAtom* atom2 )
2371 {
2372    IRType  and_or_ty;
2373    IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
2374    IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
2375    IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2376 
2377    IRAtom* vatom1 = expr2vbits( mce, atom1 );
2378    IRAtom* vatom2 = expr2vbits( mce, atom2 );
2379 
2380    tl_assert(isOriginalAtom(mce,atom1));
2381    tl_assert(isOriginalAtom(mce,atom2));
2382    tl_assert(isShadowAtom(mce,vatom1));
2383    tl_assert(isShadowAtom(mce,vatom2));
2384    tl_assert(sameKindedAtoms(atom1,vatom1));
2385    tl_assert(sameKindedAtoms(atom2,vatom2));
2386    switch (op) {
2387 
2388       /* 32-bit SIMD */
2389 
2390       case Iop_Add16x2:
2391       case Iop_HAdd16Ux2:
2392       case Iop_HAdd16Sx2:
2393       case Iop_Sub16x2:
2394       case Iop_HSub16Ux2:
2395       case Iop_HSub16Sx2:
2396       case Iop_QAdd16Sx2:
2397       case Iop_QSub16Sx2:
2398          return binary16Ix2(mce, vatom1, vatom2);
2399 
2400       case Iop_Add8x4:
2401       case Iop_HAdd8Ux4:
2402       case Iop_HAdd8Sx4:
2403       case Iop_Sub8x4:
2404       case Iop_HSub8Ux4:
2405       case Iop_HSub8Sx4:
2406       case Iop_QSub8Ux4:
2407       case Iop_QAdd8Ux4:
2408       case Iop_QSub8Sx4:
2409       case Iop_QAdd8Sx4:
2410          return binary8Ix4(mce, vatom1, vatom2);
2411 
2412       /* 64-bit SIMD */
2413 
2414       case Iop_ShrN8x8:
2415       case Iop_ShrN16x4:
2416       case Iop_ShrN32x2:
2417       case Iop_SarN8x8:
2418       case Iop_SarN16x4:
2419       case Iop_SarN32x2:
2420       case Iop_ShlN16x4:
2421       case Iop_ShlN32x2:
2422       case Iop_ShlN8x8:
2423          /* Same scheme as with all other shifts. */
2424          complainIfUndefined(mce, atom2);
2425          return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2426 
2427       case Iop_QNarrowBin32Sto16Sx4:
2428       case Iop_QNarrowBin16Sto8Sx8:
2429       case Iop_QNarrowBin16Sto8Ux8:
2430          return vectorNarrowBin64(mce, op, vatom1, vatom2);
2431 
2432       case Iop_Min8Ux8:
2433       case Iop_Min8Sx8:
2434       case Iop_Max8Ux8:
2435       case Iop_Max8Sx8:
2436       case Iop_Avg8Ux8:
2437       case Iop_QSub8Sx8:
2438       case Iop_QSub8Ux8:
2439       case Iop_Sub8x8:
2440       case Iop_CmpGT8Sx8:
2441       case Iop_CmpGT8Ux8:
2442       case Iop_CmpEQ8x8:
2443       case Iop_QAdd8Sx8:
2444       case Iop_QAdd8Ux8:
2445       case Iop_QSal8x8:
2446       case Iop_QShl8x8:
2447       case Iop_Add8x8:
2448       case Iop_Mul8x8:
2449       case Iop_PolynomialMul8x8:
2450          return binary8Ix8(mce, vatom1, vatom2);
2451 
2452       case Iop_Min16Sx4:
2453       case Iop_Min16Ux4:
2454       case Iop_Max16Sx4:
2455       case Iop_Max16Ux4:
2456       case Iop_Avg16Ux4:
2457       case Iop_QSub16Ux4:
2458       case Iop_QSub16Sx4:
2459       case Iop_Sub16x4:
2460       case Iop_Mul16x4:
2461       case Iop_MulHi16Sx4:
2462       case Iop_MulHi16Ux4:
2463       case Iop_CmpGT16Sx4:
2464       case Iop_CmpGT16Ux4:
2465       case Iop_CmpEQ16x4:
2466       case Iop_QAdd16Sx4:
2467       case Iop_QAdd16Ux4:
2468       case Iop_QSal16x4:
2469       case Iop_QShl16x4:
2470       case Iop_Add16x4:
2471       case Iop_QDMulHi16Sx4:
2472       case Iop_QRDMulHi16Sx4:
2473          return binary16Ix4(mce, vatom1, vatom2);
2474 
2475       case Iop_Sub32x2:
2476       case Iop_Mul32x2:
2477       case Iop_Max32Sx2:
2478       case Iop_Max32Ux2:
2479       case Iop_Min32Sx2:
2480       case Iop_Min32Ux2:
2481       case Iop_CmpGT32Sx2:
2482       case Iop_CmpGT32Ux2:
2483       case Iop_CmpEQ32x2:
2484       case Iop_Add32x2:
2485       case Iop_QAdd32Ux2:
2486       case Iop_QAdd32Sx2:
2487       case Iop_QSub32Ux2:
2488       case Iop_QSub32Sx2:
2489       case Iop_QSal32x2:
2490       case Iop_QShl32x2:
2491       case Iop_QDMulHi32Sx2:
2492       case Iop_QRDMulHi32Sx2:
2493          return binary32Ix2(mce, vatom1, vatom2);
2494 
2495       case Iop_QSub64Ux1:
2496       case Iop_QSub64Sx1:
2497       case Iop_QAdd64Ux1:
2498       case Iop_QAdd64Sx1:
2499       case Iop_QSal64x1:
2500       case Iop_QShl64x1:
2501       case Iop_Sal64x1:
2502          return binary64Ix1(mce, vatom1, vatom2);
2503 
2504       case Iop_QShlN8Sx8:
2505       case Iop_QShlN8x8:
2506       case Iop_QSalN8x8:
2507          complainIfUndefined(mce, atom2);
2508          return mkPCast8x8(mce, vatom1);
2509 
2510       case Iop_QShlN16Sx4:
2511       case Iop_QShlN16x4:
2512       case Iop_QSalN16x4:
2513          complainIfUndefined(mce, atom2);
2514          return mkPCast16x4(mce, vatom1);
2515 
2516       case Iop_QShlN32Sx2:
2517       case Iop_QShlN32x2:
2518       case Iop_QSalN32x2:
2519          complainIfUndefined(mce, atom2);
2520          return mkPCast32x2(mce, vatom1);
2521 
2522       case Iop_QShlN64Sx1:
2523       case Iop_QShlN64x1:
2524       case Iop_QSalN64x1:
2525          complainIfUndefined(mce, atom2);
2526          return mkPCast32x2(mce, vatom1);
2527 
2528       case Iop_PwMax32Sx2:
2529       case Iop_PwMax32Ux2:
2530       case Iop_PwMin32Sx2:
2531       case Iop_PwMin32Ux2:
2532       case Iop_PwMax32Fx2:
2533       case Iop_PwMin32Fx2:
2534          return assignNew('V', mce, Ity_I64, binop(Iop_PwMax32Ux2, mkPCast32x2(mce, vatom1),
2535                      mkPCast32x2(mce, vatom2)));
2536 
2537       case Iop_PwMax16Sx4:
2538       case Iop_PwMax16Ux4:
2539       case Iop_PwMin16Sx4:
2540       case Iop_PwMin16Ux4:
2541          return assignNew('V', mce, Ity_I64, binop(Iop_PwMax16Ux4, mkPCast16x4(mce, vatom1),
2542                      mkPCast16x4(mce, vatom2)));
2543 
2544       case Iop_PwMax8Sx8:
2545       case Iop_PwMax8Ux8:
2546       case Iop_PwMin8Sx8:
2547       case Iop_PwMin8Ux8:
2548          return assignNew('V', mce, Ity_I64, binop(Iop_PwMax8Ux8, mkPCast8x8(mce, vatom1),
2549                      mkPCast8x8(mce, vatom2)));
2550 
2551       case Iop_PwAdd32x2:
2552       case Iop_PwAdd32Fx2:
2553          return mkPCast32x2(mce,
2554                assignNew('V', mce, Ity_I64, binop(Iop_PwAdd32x2, mkPCast32x2(mce, vatom1),
2555                      mkPCast32x2(mce, vatom2))));
2556 
2557       case Iop_PwAdd16x4:
2558          return mkPCast16x4(mce,
2559                assignNew('V', mce, Ity_I64, binop(op, mkPCast16x4(mce, vatom1),
2560                      mkPCast16x4(mce, vatom2))));
2561 
2562       case Iop_PwAdd8x8:
2563          return mkPCast8x8(mce,
2564                assignNew('V', mce, Ity_I64, binop(op, mkPCast8x8(mce, vatom1),
2565                      mkPCast8x8(mce, vatom2))));
2566 
2567       case Iop_Shl8x8:
2568       case Iop_Shr8x8:
2569       case Iop_Sar8x8:
2570       case Iop_Sal8x8:
2571          return mkUifU64(mce,
2572                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2573                    mkPCast8x8(mce,vatom2)
2574                 );
2575 
2576       case Iop_Shl16x4:
2577       case Iop_Shr16x4:
2578       case Iop_Sar16x4:
2579       case Iop_Sal16x4:
2580          return mkUifU64(mce,
2581                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2582                    mkPCast16x4(mce,vatom2)
2583                 );
2584 
2585       case Iop_Shl32x2:
2586       case Iop_Shr32x2:
2587       case Iop_Sar32x2:
2588       case Iop_Sal32x2:
2589          return mkUifU64(mce,
2590                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2591                    mkPCast32x2(mce,vatom2)
2592                 );
2593 
2594       /* 64-bit data-steering */
2595       case Iop_InterleaveLO32x2:
2596       case Iop_InterleaveLO16x4:
2597       case Iop_InterleaveLO8x8:
2598       case Iop_InterleaveHI32x2:
2599       case Iop_InterleaveHI16x4:
2600       case Iop_InterleaveHI8x8:
2601       case Iop_CatOddLanes8x8:
2602       case Iop_CatEvenLanes8x8:
2603       case Iop_CatOddLanes16x4:
2604       case Iop_CatEvenLanes16x4:
2605       case Iop_InterleaveOddLanes8x8:
2606       case Iop_InterleaveEvenLanes8x8:
2607       case Iop_InterleaveOddLanes16x4:
2608       case Iop_InterleaveEvenLanes16x4:
2609          return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
2610 
2611       case Iop_GetElem8x8:
2612          complainIfUndefined(mce, atom2);
2613          return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2614       case Iop_GetElem16x4:
2615          complainIfUndefined(mce, atom2);
2616          return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2617       case Iop_GetElem32x2:
2618          complainIfUndefined(mce, atom2);
2619          return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2620 
2621       /* Perm8x8: rearrange values in left arg using steering values
2622         from right arg.  So rearrange the vbits in the same way but
2623         pessimise wrt steering values. */
2624       case Iop_Perm8x8:
2625          return mkUifU64(
2626                    mce,
2627                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2628                    mkPCast8x8(mce, vatom2)
2629                 );
2630 
2631       /* V128-bit SIMD */
2632 
2633       case Iop_ShrN8x16:
2634       case Iop_ShrN16x8:
2635       case Iop_ShrN32x4:
2636       case Iop_ShrN64x2:
2637       case Iop_SarN8x16:
2638       case Iop_SarN16x8:
2639       case Iop_SarN32x4:
2640       case Iop_SarN64x2:
2641       case Iop_ShlN8x16:
2642       case Iop_ShlN16x8:
2643       case Iop_ShlN32x4:
2644       case Iop_ShlN64x2:
2645          /* Same scheme as with all other shifts.  Note: 22 Oct 05:
2646             this is wrong now, scalar shifts are done properly lazily.
2647             Vector shifts should be fixed too. */
2648          complainIfUndefined(mce, atom2);
2649          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
2650 
2651       /* V x V shifts/rotates are done using the standard lazy scheme. */
2652       case Iop_Shl8x16:
2653       case Iop_Shr8x16:
2654       case Iop_Sar8x16:
2655       case Iop_Sal8x16:
2656       case Iop_Rol8x16:
2657          return mkUifUV128(mce,
2658                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2659                    mkPCast8x16(mce,vatom2)
2660                 );
2661 
2662       case Iop_Shl16x8:
2663       case Iop_Shr16x8:
2664       case Iop_Sar16x8:
2665       case Iop_Sal16x8:
2666       case Iop_Rol16x8:
2667          return mkUifUV128(mce,
2668                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2669                    mkPCast16x8(mce,vatom2)
2670                 );
2671 
2672       case Iop_Shl32x4:
2673       case Iop_Shr32x4:
2674       case Iop_Sar32x4:
2675       case Iop_Sal32x4:
2676       case Iop_Rol32x4:
2677          return mkUifUV128(mce,
2678                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2679                    mkPCast32x4(mce,vatom2)
2680                 );
2681 
2682       case Iop_Shl64x2:
2683       case Iop_Shr64x2:
2684       case Iop_Sar64x2:
2685       case Iop_Sal64x2:
2686          return mkUifUV128(mce,
2687                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2688                    mkPCast64x2(mce,vatom2)
2689                 );
2690 
2691       case Iop_F32ToFixed32Ux4_RZ:
2692       case Iop_F32ToFixed32Sx4_RZ:
2693       case Iop_Fixed32UToF32x4_RN:
2694       case Iop_Fixed32SToF32x4_RN:
2695          complainIfUndefined(mce, atom2);
2696          return mkPCast32x4(mce, vatom1);
2697 
2698       case Iop_F32ToFixed32Ux2_RZ:
2699       case Iop_F32ToFixed32Sx2_RZ:
2700       case Iop_Fixed32UToF32x2_RN:
2701       case Iop_Fixed32SToF32x2_RN:
2702          complainIfUndefined(mce, atom2);
2703          return mkPCast32x2(mce, vatom1);
2704 
2705       case Iop_QSub8Ux16:
2706       case Iop_QSub8Sx16:
2707       case Iop_Sub8x16:
2708       case Iop_Min8Ux16:
2709       case Iop_Min8Sx16:
2710       case Iop_Max8Ux16:
2711       case Iop_Max8Sx16:
2712       case Iop_CmpGT8Sx16:
2713       case Iop_CmpGT8Ux16:
2714       case Iop_CmpEQ8x16:
2715       case Iop_Avg8Ux16:
2716       case Iop_Avg8Sx16:
2717       case Iop_QAdd8Ux16:
2718       case Iop_QAdd8Sx16:
2719       case Iop_QSal8x16:
2720       case Iop_QShl8x16:
2721       case Iop_Add8x16:
2722       case Iop_Mul8x16:
2723       case Iop_PolynomialMul8x16:
2724          return binary8Ix16(mce, vatom1, vatom2);
2725 
2726       case Iop_QSub16Ux8:
2727       case Iop_QSub16Sx8:
2728       case Iop_Sub16x8:
2729       case Iop_Mul16x8:
2730       case Iop_MulHi16Sx8:
2731       case Iop_MulHi16Ux8:
2732       case Iop_Min16Sx8:
2733       case Iop_Min16Ux8:
2734       case Iop_Max16Sx8:
2735       case Iop_Max16Ux8:
2736       case Iop_CmpGT16Sx8:
2737       case Iop_CmpGT16Ux8:
2738       case Iop_CmpEQ16x8:
2739       case Iop_Avg16Ux8:
2740       case Iop_Avg16Sx8:
2741       case Iop_QAdd16Ux8:
2742       case Iop_QAdd16Sx8:
2743       case Iop_QSal16x8:
2744       case Iop_QShl16x8:
2745       case Iop_Add16x8:
2746       case Iop_QDMulHi16Sx8:
2747       case Iop_QRDMulHi16Sx8:
2748          return binary16Ix8(mce, vatom1, vatom2);
2749 
2750       case Iop_Sub32x4:
2751       case Iop_CmpGT32Sx4:
2752       case Iop_CmpGT32Ux4:
2753       case Iop_CmpEQ32x4:
2754       case Iop_QAdd32Sx4:
2755       case Iop_QAdd32Ux4:
2756       case Iop_QSub32Sx4:
2757       case Iop_QSub32Ux4:
2758       case Iop_QSal32x4:
2759       case Iop_QShl32x4:
2760       case Iop_Avg32Ux4:
2761       case Iop_Avg32Sx4:
2762       case Iop_Add32x4:
2763       case Iop_Max32Ux4:
2764       case Iop_Max32Sx4:
2765       case Iop_Min32Ux4:
2766       case Iop_Min32Sx4:
2767       case Iop_Mul32x4:
2768       case Iop_QDMulHi32Sx4:
2769       case Iop_QRDMulHi32Sx4:
2770          return binary32Ix4(mce, vatom1, vatom2);
2771 
2772       case Iop_Sub64x2:
2773       case Iop_Add64x2:
2774       case Iop_CmpEQ64x2:
2775       case Iop_CmpGT64Sx2:
2776       case Iop_QSal64x2:
2777       case Iop_QShl64x2:
2778       case Iop_QAdd64Ux2:
2779       case Iop_QAdd64Sx2:
2780       case Iop_QSub64Ux2:
2781       case Iop_QSub64Sx2:
2782          return binary64Ix2(mce, vatom1, vatom2);
2783 
2784       case Iop_QNarrowBin32Sto16Sx8:
2785       case Iop_QNarrowBin32Uto16Ux8:
2786       case Iop_QNarrowBin32Sto16Ux8:
2787       case Iop_QNarrowBin16Sto8Sx16:
2788       case Iop_QNarrowBin16Uto8Ux16:
2789       case Iop_QNarrowBin16Sto8Ux16:
2790          return vectorNarrowBinV128(mce, op, vatom1, vatom2);
2791 
2792       case Iop_Sub64Fx2:
2793       case Iop_Mul64Fx2:
2794       case Iop_Min64Fx2:
2795       case Iop_Max64Fx2:
2796       case Iop_Div64Fx2:
2797       case Iop_CmpLT64Fx2:
2798       case Iop_CmpLE64Fx2:
2799       case Iop_CmpEQ64Fx2:
2800       case Iop_CmpUN64Fx2:
2801       case Iop_Add64Fx2:
2802          return binary64Fx2(mce, vatom1, vatom2);
2803 
2804       case Iop_Sub64F0x2:
2805       case Iop_Mul64F0x2:
2806       case Iop_Min64F0x2:
2807       case Iop_Max64F0x2:
2808       case Iop_Div64F0x2:
2809       case Iop_CmpLT64F0x2:
2810       case Iop_CmpLE64F0x2:
2811       case Iop_CmpEQ64F0x2:
2812       case Iop_CmpUN64F0x2:
2813       case Iop_Add64F0x2:
2814          return binary64F0x2(mce, vatom1, vatom2);
2815 
2816       case Iop_Sub32Fx4:
2817       case Iop_Mul32Fx4:
2818       case Iop_Min32Fx4:
2819       case Iop_Max32Fx4:
2820       case Iop_Div32Fx4:
2821       case Iop_CmpLT32Fx4:
2822       case Iop_CmpLE32Fx4:
2823       case Iop_CmpEQ32Fx4:
2824       case Iop_CmpUN32Fx4:
2825       case Iop_CmpGT32Fx4:
2826       case Iop_CmpGE32Fx4:
2827       case Iop_Add32Fx4:
2828       case Iop_Recps32Fx4:
2829       case Iop_Rsqrts32Fx4:
2830          return binary32Fx4(mce, vatom1, vatom2);
2831 
2832       case Iop_Sub32Fx2:
2833       case Iop_Mul32Fx2:
2834       case Iop_Min32Fx2:
2835       case Iop_Max32Fx2:
2836       case Iop_CmpEQ32Fx2:
2837       case Iop_CmpGT32Fx2:
2838       case Iop_CmpGE32Fx2:
2839       case Iop_Add32Fx2:
2840       case Iop_Recps32Fx2:
2841       case Iop_Rsqrts32Fx2:
2842          return binary32Fx2(mce, vatom1, vatom2);
2843 
2844       case Iop_Sub32F0x4:
2845       case Iop_Mul32F0x4:
2846       case Iop_Min32F0x4:
2847       case Iop_Max32F0x4:
2848       case Iop_Div32F0x4:
2849       case Iop_CmpLT32F0x4:
2850       case Iop_CmpLE32F0x4:
2851       case Iop_CmpEQ32F0x4:
2852       case Iop_CmpUN32F0x4:
2853       case Iop_Add32F0x4:
2854          return binary32F0x4(mce, vatom1, vatom2);
2855 
2856       case Iop_QShlN8Sx16:
2857       case Iop_QShlN8x16:
2858       case Iop_QSalN8x16:
2859          complainIfUndefined(mce, atom2);
2860          return mkPCast8x16(mce, vatom1);
2861 
2862       case Iop_QShlN16Sx8:
2863       case Iop_QShlN16x8:
2864       case Iop_QSalN16x8:
2865          complainIfUndefined(mce, atom2);
2866          return mkPCast16x8(mce, vatom1);
2867 
2868       case Iop_QShlN32Sx4:
2869       case Iop_QShlN32x4:
2870       case Iop_QSalN32x4:
2871          complainIfUndefined(mce, atom2);
2872          return mkPCast32x4(mce, vatom1);
2873 
2874       case Iop_QShlN64Sx2:
2875       case Iop_QShlN64x2:
2876       case Iop_QSalN64x2:
2877          complainIfUndefined(mce, atom2);
2878          return mkPCast32x4(mce, vatom1);
2879 
2880       case Iop_Mull32Sx2:
2881       case Iop_Mull32Ux2:
2882       case Iop_QDMulLong32Sx2:
2883          return vectorWidenI64(mce, Iop_Widen32Sto64x2,
2884                                     mkUifU64(mce, vatom1, vatom2));
2885 
2886       case Iop_Mull16Sx4:
2887       case Iop_Mull16Ux4:
2888       case Iop_QDMulLong16Sx4:
2889          return vectorWidenI64(mce, Iop_Widen16Sto32x4,
2890                                     mkUifU64(mce, vatom1, vatom2));
2891 
2892       case Iop_Mull8Sx8:
2893       case Iop_Mull8Ux8:
2894       case Iop_PolynomialMull8x8:
2895          return vectorWidenI64(mce, Iop_Widen8Sto16x8,
2896                                     mkUifU64(mce, vatom1, vatom2));
2897 
2898       case Iop_PwAdd32x4:
2899          return mkPCast32x4(mce,
2900                assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
2901                      mkPCast32x4(mce, vatom2))));
2902 
2903       case Iop_PwAdd16x8:
2904          return mkPCast16x8(mce,
2905                assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
2906                      mkPCast16x8(mce, vatom2))));
2907 
2908       case Iop_PwAdd8x16:
2909          return mkPCast8x16(mce,
2910                assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
2911                      mkPCast8x16(mce, vatom2))));
2912 
2913       /* V128-bit data-steering */
2914       case Iop_SetV128lo32:
2915       case Iop_SetV128lo64:
2916       case Iop_64HLtoV128:
2917       case Iop_InterleaveLO64x2:
2918       case Iop_InterleaveLO32x4:
2919       case Iop_InterleaveLO16x8:
2920       case Iop_InterleaveLO8x16:
2921       case Iop_InterleaveHI64x2:
2922       case Iop_InterleaveHI32x4:
2923       case Iop_InterleaveHI16x8:
2924       case Iop_InterleaveHI8x16:
2925       case Iop_CatOddLanes8x16:
2926       case Iop_CatOddLanes16x8:
2927       case Iop_CatOddLanes32x4:
2928       case Iop_CatEvenLanes8x16:
2929       case Iop_CatEvenLanes16x8:
2930       case Iop_CatEvenLanes32x4:
2931       case Iop_InterleaveOddLanes8x16:
2932       case Iop_InterleaveOddLanes16x8:
2933       case Iop_InterleaveOddLanes32x4:
2934       case Iop_InterleaveEvenLanes8x16:
2935       case Iop_InterleaveEvenLanes16x8:
2936       case Iop_InterleaveEvenLanes32x4:
2937          return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
2938 
2939       case Iop_GetElem8x16:
2940          complainIfUndefined(mce, atom2);
2941          return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2942       case Iop_GetElem16x8:
2943          complainIfUndefined(mce, atom2);
2944          return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2945       case Iop_GetElem32x4:
2946          complainIfUndefined(mce, atom2);
2947          return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2948       case Iop_GetElem64x2:
2949          complainIfUndefined(mce, atom2);
2950          return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2951 
2952      /* Perm8x16: rearrange values in left arg using steering values
2953         from right arg.  So rearrange the vbits in the same way but
2954         pessimise wrt steering values. */
2955       case Iop_Perm8x16:
2956          return mkUifUV128(
2957                    mce,
2958                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2959                    mkPCast8x16(mce, vatom2)
2960                 );
2961 
2962      /* These two take the lower half of each 16-bit lane, sign/zero
2963         extend it to 32, and multiply together, producing a 32x4
2964         result (and implicitly ignoring half the operand bits).  So
2965         treat it as a bunch of independent 16x8 operations, but then
2966         do 32-bit shifts left-right to copy the lower half results
2967         (which are all 0s or all 1s due to PCasting in binary16Ix8)
2968         into the upper half of each result lane. */
2969       case Iop_MullEven16Ux8:
2970       case Iop_MullEven16Sx8: {
2971          IRAtom* at;
2972          at = binary16Ix8(mce,vatom1,vatom2);
2973          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
2974          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
2975 	 return at;
2976       }
2977 
2978       /* Same deal as Iop_MullEven16{S,U}x8 */
2979       case Iop_MullEven8Ux16:
2980       case Iop_MullEven8Sx16: {
2981          IRAtom* at;
2982          at = binary8Ix16(mce,vatom1,vatom2);
2983          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
2984          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
2985 	 return at;
2986       }
2987 
2988       /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
2989          32x4 -> 16x8 laneage, discarding the upper half of each lane.
2990          Simply apply same op to the V bits, since this really no more
2991          than a data steering operation. */
2992       case Iop_NarrowBin32to16x8:
2993       case Iop_NarrowBin16to8x16:
2994          return assignNew('V', mce, Ity_V128,
2995                                     binop(op, vatom1, vatom2));
2996 
2997       case Iop_ShrV128:
2998       case Iop_ShlV128:
2999          /* Same scheme as with all other shifts.  Note: 10 Nov 05:
3000             this is wrong now, scalar shifts are done properly lazily.
3001             Vector shifts should be fixed too. */
3002          complainIfUndefined(mce, atom2);
3003          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3004 
3005       /* I128-bit data-steering */
3006       case Iop_64HLto128:
3007          return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
3008 
3009       /* Scalar floating point */
3010 
3011       case Iop_F32toI64S:
3012          /* I32(rm) x F32 -> I64 */
3013          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3014 
3015       case Iop_I64StoF32:
3016          /* I32(rm) x I64 -> F32 */
3017          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3018 
3019       case Iop_RoundF64toInt:
3020       case Iop_RoundF64toF32:
3021       case Iop_F64toI64S:
3022       case Iop_F64toI64U:
3023       case Iop_I64StoF64:
3024       case Iop_I64UtoF64:
3025       case Iop_SinF64:
3026       case Iop_CosF64:
3027       case Iop_TanF64:
3028       case Iop_2xm1F64:
3029       case Iop_SqrtF64:
3030          /* I32(rm) x I64/F64 -> I64/F64 */
3031          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3032 
3033       case Iop_RoundF32toInt:
3034       case Iop_SqrtF32:
3035          /* I32(rm) x I32/F32 -> I32/F32 */
3036          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3037 
3038       case Iop_SqrtF128:
3039          /* I32(rm) x F128 -> F128 */
3040          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3041 
3042       case Iop_I32StoF32:
3043       case Iop_F32toI32S:
3044          /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3045          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3046 
3047       case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32  */
3048       case Iop_F128toF32:  /* IRRoundingMode(I32) x F128 -> F32         */
3049          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3050 
3051       case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64  */
3052       case Iop_F128toF64:  /* IRRoundingMode(I32) x F128 -> F64         */
3053          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3054 
3055       case Iop_F64HLtoF128:
3056          return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vatom1, vatom2));
3057 
3058       case Iop_F64toI32U:
3059       case Iop_F64toI32S:
3060       case Iop_F64toF32:
3061       case Iop_I64UtoF32:
3062          /* First arg is I32 (rounding mode), second is F64 (data). */
3063          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3064 
3065       case Iop_F64toI16S:
3066          /* First arg is I32 (rounding mode), second is F64 (data). */
3067          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3068 
3069       case Iop_CmpF32:
3070       case Iop_CmpF64:
3071       case Iop_CmpF128:
3072          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3073 
3074       /* non-FP after here */
3075 
3076       case Iop_DivModU64to32:
3077       case Iop_DivModS64to32:
3078          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3079 
3080       case Iop_DivModU128to64:
3081       case Iop_DivModS128to64:
3082          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3083 
3084       case Iop_16HLto32:
3085          return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
3086       case Iop_32HLto64:
3087          return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3088 
3089       case Iop_DivModS64to64:
3090       case Iop_MullS64:
3091       case Iop_MullU64: {
3092          IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3093          IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
3094          return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
3095       }
3096 
3097       case Iop_MullS32:
3098       case Iop_MullU32: {
3099          IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3100          IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
3101          return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
3102       }
3103 
3104       case Iop_MullS16:
3105       case Iop_MullU16: {
3106          IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3107          IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
3108          return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
3109       }
3110 
3111       case Iop_MullS8:
3112       case Iop_MullU8: {
3113          IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3114          IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
3115          return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
3116       }
3117 
3118       case Iop_Sad8Ux4: /* maybe we could do better?  ftm, do mkLazy2. */
3119       case Iop_DivS32:
3120       case Iop_DivU32:
3121       case Iop_DivU32E:
3122       case Iop_DivS32E:
3123          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3124 
3125       case Iop_DivS64:
3126       case Iop_DivU64:
3127       case Iop_DivS64E:
3128       case Iop_DivU64E:
3129          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3130 
3131       case Iop_Add32:
3132          if (mce->bogusLiterals)
3133             return expensiveAddSub(mce,True,Ity_I32,
3134                                    vatom1,vatom2, atom1,atom2);
3135          else
3136             goto cheap_AddSub32;
3137       case Iop_Sub32:
3138          if (mce->bogusLiterals)
3139             return expensiveAddSub(mce,False,Ity_I32,
3140                                    vatom1,vatom2, atom1,atom2);
3141          else
3142             goto cheap_AddSub32;
3143 
3144       cheap_AddSub32:
3145       case Iop_Mul32:
3146          return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3147 
3148       case Iop_CmpORD32S:
3149       case Iop_CmpORD32U:
3150       case Iop_CmpORD64S:
3151       case Iop_CmpORD64U:
3152          return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
3153 
3154       case Iop_Add64:
3155          if (mce->bogusLiterals)
3156             return expensiveAddSub(mce,True,Ity_I64,
3157                                    vatom1,vatom2, atom1,atom2);
3158          else
3159             goto cheap_AddSub64;
3160       case Iop_Sub64:
3161          if (mce->bogusLiterals)
3162             return expensiveAddSub(mce,False,Ity_I64,
3163                                    vatom1,vatom2, atom1,atom2);
3164          else
3165             goto cheap_AddSub64;
3166 
3167       cheap_AddSub64:
3168       case Iop_Mul64:
3169          return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3170 
3171       case Iop_Mul16:
3172       case Iop_Add16:
3173       case Iop_Sub16:
3174          return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3175 
3176       case Iop_Sub8:
3177       case Iop_Add8:
3178          return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3179 
3180       case Iop_CmpEQ64:
3181       case Iop_CmpNE64:
3182          if (mce->bogusLiterals)
3183             return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3184          else
3185             goto cheap_cmp64;
3186       cheap_cmp64:
3187       case Iop_CmpLE64S: case Iop_CmpLE64U:
3188       case Iop_CmpLT64U: case Iop_CmpLT64S:
3189          return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3190 
3191       case Iop_CmpEQ32:
3192       case Iop_CmpNE32:
3193          if (mce->bogusLiterals)
3194             return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3195          else
3196             goto cheap_cmp32;
3197       cheap_cmp32:
3198       case Iop_CmpLE32S: case Iop_CmpLE32U:
3199       case Iop_CmpLT32U: case Iop_CmpLT32S:
3200          return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3201 
3202       case Iop_CmpEQ16: case Iop_CmpNE16:
3203          return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3204 
3205       case Iop_CmpEQ8: case Iop_CmpNE8:
3206          return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3207 
3208       case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
3209       case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3210       case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3211       case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3212          /* Just say these all produce a defined result, regardless
3213             of their arguments.  See COMMENT_ON_CasCmpEQ in this file. */
3214          return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3215 
3216       case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3217          return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3218 
3219       case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
3220          return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
3221 
3222       case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
3223          return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
3224 
3225       case Iop_Shl8: case Iop_Shr8:
3226          return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
3227 
3228       case Iop_AndV128:
3229          uifu = mkUifUV128; difd = mkDifDV128;
3230          and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
3231       case Iop_And64:
3232          uifu = mkUifU64; difd = mkDifD64;
3233          and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
3234       case Iop_And32:
3235          uifu = mkUifU32; difd = mkDifD32;
3236          and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3237       case Iop_And16:
3238          uifu = mkUifU16; difd = mkDifD16;
3239          and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3240       case Iop_And8:
3241          uifu = mkUifU8; difd = mkDifD8;
3242          and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3243 
3244       case Iop_OrV128:
3245          uifu = mkUifUV128; difd = mkDifDV128;
3246          and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
3247       case Iop_Or64:
3248          uifu = mkUifU64; difd = mkDifD64;
3249          and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
3250       case Iop_Or32:
3251          uifu = mkUifU32; difd = mkDifD32;
3252          and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3253       case Iop_Or16:
3254          uifu = mkUifU16; difd = mkDifD16;
3255          and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3256       case Iop_Or8:
3257          uifu = mkUifU8; difd = mkDifD8;
3258          and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3259 
3260       do_And_Or:
3261          return
3262          assignNew(
3263             'V', mce,
3264             and_or_ty,
3265             difd(mce, uifu(mce, vatom1, vatom2),
3266                       difd(mce, improve(mce, atom1, vatom1),
3267                                 improve(mce, atom2, vatom2) ) ) );
3268 
3269       case Iop_Xor8:
3270          return mkUifU8(mce, vatom1, vatom2);
3271       case Iop_Xor16:
3272          return mkUifU16(mce, vatom1, vatom2);
3273       case Iop_Xor32:
3274          return mkUifU32(mce, vatom1, vatom2);
3275       case Iop_Xor64:
3276          return mkUifU64(mce, vatom1, vatom2);
3277       case Iop_XorV128:
3278          return mkUifUV128(mce, vatom1, vatom2);
3279 
3280       default:
3281          ppIROp(op);
3282          VG_(tool_panic)("memcheck:expr2vbits_Binop");
3283    }
3284 }
3285 
3286 
3287 static
expr2vbits_Unop(MCEnv * mce,IROp op,IRAtom * atom)3288 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3289 {
3290    IRAtom* vatom = expr2vbits( mce, atom );
3291    tl_assert(isOriginalAtom(mce,atom));
3292    switch (op) {
3293 
3294       case Iop_Sqrt64Fx2:
3295          return unary64Fx2(mce, vatom);
3296 
3297       case Iop_Sqrt64F0x2:
3298          return unary64F0x2(mce, vatom);
3299 
3300       case Iop_Sqrt32Fx4:
3301       case Iop_RSqrt32Fx4:
3302       case Iop_Recip32Fx4:
3303       case Iop_I32UtoFx4:
3304       case Iop_I32StoFx4:
3305       case Iop_QFtoI32Ux4_RZ:
3306       case Iop_QFtoI32Sx4_RZ:
3307       case Iop_RoundF32x4_RM:
3308       case Iop_RoundF32x4_RP:
3309       case Iop_RoundF32x4_RN:
3310       case Iop_RoundF32x4_RZ:
3311       case Iop_Recip32x4:
3312       case Iop_Abs32Fx4:
3313       case Iop_Neg32Fx4:
3314       case Iop_Rsqrte32Fx4:
3315          return unary32Fx4(mce, vatom);
3316 
3317       case Iop_I32UtoFx2:
3318       case Iop_I32StoFx2:
3319       case Iop_Recip32Fx2:
3320       case Iop_Recip32x2:
3321       case Iop_Abs32Fx2:
3322       case Iop_Neg32Fx2:
3323       case Iop_Rsqrte32Fx2:
3324          return unary32Fx2(mce, vatom);
3325 
3326       case Iop_Sqrt32F0x4:
3327       case Iop_RSqrt32F0x4:
3328       case Iop_Recip32F0x4:
3329          return unary32F0x4(mce, vatom);
3330 
3331       case Iop_32UtoV128:
3332       case Iop_64UtoV128:
3333       case Iop_Dup8x16:
3334       case Iop_Dup16x8:
3335       case Iop_Dup32x4:
3336       case Iop_Reverse16_8x16:
3337       case Iop_Reverse32_8x16:
3338       case Iop_Reverse32_16x8:
3339       case Iop_Reverse64_8x16:
3340       case Iop_Reverse64_16x8:
3341       case Iop_Reverse64_32x4:
3342          return assignNew('V', mce, Ity_V128, unop(op, vatom));
3343 
3344       case Iop_F128HItoF64:  /* F128 -> high half of F128 */
3345          return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3346       case Iop_F128LOtoF64:  /* F128 -> low  half of F128 */
3347          return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3348 
3349       case Iop_NegF128:
3350       case Iop_AbsF128:
3351          return mkPCastTo(mce, Ity_I128, vatom);
3352 
3353       case Iop_I32StoF128: /* signed I32 -> F128 */
3354       case Iop_I64StoF128: /* signed I64 -> F128 */
3355       case Iop_F32toF128:  /* F32 -> F128 */
3356       case Iop_F64toF128:  /* F64 -> F128 */
3357          return mkPCastTo(mce, Ity_I128, vatom);
3358 
3359       case Iop_F32toF64:
3360       case Iop_I32StoF64:
3361       case Iop_I32UtoF64:
3362       case Iop_NegF64:
3363       case Iop_AbsF64:
3364       case Iop_Est5FRSqrt:
3365       case Iop_RoundF64toF64_NEAREST:
3366       case Iop_RoundF64toF64_NegINF:
3367       case Iop_RoundF64toF64_PosINF:
3368       case Iop_RoundF64toF64_ZERO:
3369       case Iop_Clz64:
3370       case Iop_Ctz64:
3371          return mkPCastTo(mce, Ity_I64, vatom);
3372 
3373       case Iop_Clz32:
3374       case Iop_Ctz32:
3375       case Iop_TruncF64asF32:
3376       case Iop_NegF32:
3377       case Iop_AbsF32:
3378          return mkPCastTo(mce, Ity_I32, vatom);
3379 
3380       case Iop_1Uto64:
3381       case Iop_1Sto64:
3382       case Iop_8Uto64:
3383       case Iop_8Sto64:
3384       case Iop_16Uto64:
3385       case Iop_16Sto64:
3386       case Iop_32Sto64:
3387       case Iop_32Uto64:
3388       case Iop_V128to64:
3389       case Iop_V128HIto64:
3390       case Iop_128HIto64:
3391       case Iop_128to64:
3392       case Iop_Dup8x8:
3393       case Iop_Dup16x4:
3394       case Iop_Dup32x2:
3395       case Iop_Reverse16_8x8:
3396       case Iop_Reverse32_8x8:
3397       case Iop_Reverse32_16x4:
3398       case Iop_Reverse64_8x8:
3399       case Iop_Reverse64_16x4:
3400       case Iop_Reverse64_32x2:
3401          return assignNew('V', mce, Ity_I64, unop(op, vatom));
3402 
3403       case Iop_I16StoF32:
3404       case Iop_64to32:
3405       case Iop_64HIto32:
3406       case Iop_1Uto32:
3407       case Iop_1Sto32:
3408       case Iop_8Uto32:
3409       case Iop_16Uto32:
3410       case Iop_16Sto32:
3411       case Iop_8Sto32:
3412       case Iop_V128to32:
3413          return assignNew('V', mce, Ity_I32, unop(op, vatom));
3414 
3415       case Iop_8Sto16:
3416       case Iop_8Uto16:
3417       case Iop_32to16:
3418       case Iop_32HIto16:
3419       case Iop_64to16:
3420          return assignNew('V', mce, Ity_I16, unop(op, vatom));
3421 
3422       case Iop_1Uto8:
3423       case Iop_1Sto8:
3424       case Iop_16to8:
3425       case Iop_16HIto8:
3426       case Iop_32to8:
3427       case Iop_64to8:
3428          return assignNew('V', mce, Ity_I8, unop(op, vatom));
3429 
3430       case Iop_32to1:
3431          return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
3432 
3433       case Iop_64to1:
3434          return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
3435 
3436       case Iop_ReinterpF64asI64:
3437       case Iop_ReinterpI64asF64:
3438       case Iop_ReinterpI32asF32:
3439       case Iop_ReinterpF32asI32:
3440       case Iop_NotV128:
3441       case Iop_Not64:
3442       case Iop_Not32:
3443       case Iop_Not16:
3444       case Iop_Not8:
3445       case Iop_Not1:
3446          return vatom;
3447 
3448       case Iop_CmpNEZ8x8:
3449       case Iop_Cnt8x8:
3450       case Iop_Clz8Sx8:
3451       case Iop_Cls8Sx8:
3452       case Iop_Abs8x8:
3453          return mkPCast8x8(mce, vatom);
3454 
3455       case Iop_CmpNEZ8x16:
3456       case Iop_Cnt8x16:
3457       case Iop_Clz8Sx16:
3458       case Iop_Cls8Sx16:
3459       case Iop_Abs8x16:
3460          return mkPCast8x16(mce, vatom);
3461 
3462       case Iop_CmpNEZ16x4:
3463       case Iop_Clz16Sx4:
3464       case Iop_Cls16Sx4:
3465       case Iop_Abs16x4:
3466          return mkPCast16x4(mce, vatom);
3467 
3468       case Iop_CmpNEZ16x8:
3469       case Iop_Clz16Sx8:
3470       case Iop_Cls16Sx8:
3471       case Iop_Abs16x8:
3472          return mkPCast16x8(mce, vatom);
3473 
3474       case Iop_CmpNEZ32x2:
3475       case Iop_Clz32Sx2:
3476       case Iop_Cls32Sx2:
3477       case Iop_FtoI32Ux2_RZ:
3478       case Iop_FtoI32Sx2_RZ:
3479       case Iop_Abs32x2:
3480          return mkPCast32x2(mce, vatom);
3481 
3482       case Iop_CmpNEZ32x4:
3483       case Iop_Clz32Sx4:
3484       case Iop_Cls32Sx4:
3485       case Iop_FtoI32Ux4_RZ:
3486       case Iop_FtoI32Sx4_RZ:
3487       case Iop_Abs32x4:
3488          return mkPCast32x4(mce, vatom);
3489 
3490       case Iop_CmpwNEZ64:
3491          return mkPCastTo(mce, Ity_I64, vatom);
3492 
3493       case Iop_CmpNEZ64x2:
3494          return mkPCast64x2(mce, vatom);
3495 
3496       case Iop_NarrowUn16to8x8:
3497       case Iop_NarrowUn32to16x4:
3498       case Iop_NarrowUn64to32x2:
3499       case Iop_QNarrowUn16Sto8Sx8:
3500       case Iop_QNarrowUn16Sto8Ux8:
3501       case Iop_QNarrowUn16Uto8Ux8:
3502       case Iop_QNarrowUn32Sto16Sx4:
3503       case Iop_QNarrowUn32Sto16Ux4:
3504       case Iop_QNarrowUn32Uto16Ux4:
3505       case Iop_QNarrowUn64Sto32Sx2:
3506       case Iop_QNarrowUn64Sto32Ux2:
3507       case Iop_QNarrowUn64Uto32Ux2:
3508          return vectorNarrowUnV128(mce, op, vatom);
3509 
3510       case Iop_Widen8Sto16x8:
3511       case Iop_Widen8Uto16x8:
3512       case Iop_Widen16Sto32x4:
3513       case Iop_Widen16Uto32x4:
3514       case Iop_Widen32Sto64x2:
3515       case Iop_Widen32Uto64x2:
3516          return vectorWidenI64(mce, op, vatom);
3517 
3518       case Iop_PwAddL32Ux2:
3519       case Iop_PwAddL32Sx2:
3520          return mkPCastTo(mce, Ity_I64,
3521                assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3522 
3523       case Iop_PwAddL16Ux4:
3524       case Iop_PwAddL16Sx4:
3525          return mkPCast32x2(mce,
3526                assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3527 
3528       case Iop_PwAddL8Ux8:
3529       case Iop_PwAddL8Sx8:
3530          return mkPCast16x4(mce,
3531                assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3532 
3533       case Iop_PwAddL32Ux4:
3534       case Iop_PwAddL32Sx4:
3535          return mkPCast64x2(mce,
3536                assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3537 
3538       case Iop_PwAddL16Ux8:
3539       case Iop_PwAddL16Sx8:
3540          return mkPCast32x4(mce,
3541                assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3542 
3543       case Iop_PwAddL8Ux16:
3544       case Iop_PwAddL8Sx16:
3545          return mkPCast16x8(mce,
3546                assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3547 
3548       case Iop_I64UtoF32:
3549       default:
3550          ppIROp(op);
3551          VG_(tool_panic)("memcheck:expr2vbits_Unop");
3552    }
3553 }
3554 
3555 
3556 /* Worker function; do not call directly. */
3557 static
expr2vbits_Load_WRK(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias)3558 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3559                               IREndness end, IRType ty,
3560                               IRAtom* addr, UInt bias )
3561 {
3562    void*    helper;
3563    Char*    hname;
3564    IRDirty* di;
3565    IRTemp   datavbits;
3566    IRAtom*  addrAct;
3567 
3568    tl_assert(isOriginalAtom(mce,addr));
3569    tl_assert(end == Iend_LE || end == Iend_BE);
3570 
3571    /* First, emit a definedness test for the address.  This also sets
3572       the address (shadow) to 'defined' following the test. */
3573    complainIfUndefined( mce, addr );
3574 
3575    /* Now cook up a call to the relevant helper function, to read the
3576       data V bits from shadow memory. */
3577    ty = shadowTypeV(ty);
3578 
3579    if (end == Iend_LE) {
3580       switch (ty) {
3581          case Ity_I64: helper = &MC_(helperc_LOADV64le);
3582                        hname = "MC_(helperc_LOADV64le)";
3583                        break;
3584          case Ity_I32: helper = &MC_(helperc_LOADV32le);
3585                        hname = "MC_(helperc_LOADV32le)";
3586                        break;
3587          case Ity_I16: helper = &MC_(helperc_LOADV16le);
3588                        hname = "MC_(helperc_LOADV16le)";
3589                        break;
3590          case Ity_I8:  helper = &MC_(helperc_LOADV8);
3591                        hname = "MC_(helperc_LOADV8)";
3592                        break;
3593          default:      ppIRType(ty);
3594                        VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3595       }
3596    } else {
3597       switch (ty) {
3598          case Ity_I64: helper = &MC_(helperc_LOADV64be);
3599                        hname = "MC_(helperc_LOADV64be)";
3600                        break;
3601          case Ity_I32: helper = &MC_(helperc_LOADV32be);
3602                        hname = "MC_(helperc_LOADV32be)";
3603                        break;
3604          case Ity_I16: helper = &MC_(helperc_LOADV16be);
3605                        hname = "MC_(helperc_LOADV16be)";
3606                        break;
3607          case Ity_I8:  helper = &MC_(helperc_LOADV8);
3608                        hname = "MC_(helperc_LOADV8)";
3609                        break;
3610          default:      ppIRType(ty);
3611                        VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3612       }
3613    }
3614 
3615    /* Generate the actual address into addrAct. */
3616    if (bias == 0) {
3617       addrAct = addr;
3618    } else {
3619       IROp    mkAdd;
3620       IRAtom* eBias;
3621       IRType  tyAddr  = mce->hWordTy;
3622       tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
3623       mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3624       eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
3625       addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
3626    }
3627 
3628    /* We need to have a place to park the V bits we're just about to
3629       read. */
3630    datavbits = newTemp(mce, ty, VSh);
3631    di = unsafeIRDirty_1_N( datavbits,
3632                            1/*regparms*/,
3633                            hname, VG_(fnptr_to_fnentry)( helper ),
3634                            mkIRExprVec_1( addrAct ));
3635    setHelperAnns( mce, di );
3636    stmt( 'V', mce, IRStmt_Dirty(di) );
3637 
3638    return mkexpr(datavbits);
3639 }
3640 
3641 
3642 static
expr2vbits_Load(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias)3643 IRAtom* expr2vbits_Load ( MCEnv* mce,
3644                           IREndness end, IRType ty,
3645                           IRAtom* addr, UInt bias )
3646 {
3647    IRAtom *v64hi, *v64lo;
3648    tl_assert(end == Iend_LE || end == Iend_BE);
3649    switch (shadowTypeV(ty)) {
3650       case Ity_I8:
3651       case Ity_I16:
3652       case Ity_I32:
3653       case Ity_I64:
3654          return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
3655       case Ity_V128:
3656          if (end == Iend_LE) {
3657             v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3658             v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3659          } else {
3660             v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3661             v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3662          }
3663          return assignNew( 'V', mce,
3664                            Ity_V128,
3665                            binop(Iop_64HLtoV128, v64hi, v64lo));
3666       default:
3667          VG_(tool_panic)("expr2vbits_Load");
3668    }
3669 }
3670 
3671 
3672 static
expr2vbits_Mux0X(MCEnv * mce,IRAtom * cond,IRAtom * expr0,IRAtom * exprX)3673 IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
3674                            IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
3675 {
3676    IRAtom *vbitsC, *vbits0, *vbitsX;
3677    IRType ty;
3678    /* Given Mux0X(cond,expr0,exprX), generate
3679          Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
3680       That is, steer the V bits like the originals, but trash the
3681       result if the steering value is undefined.  This gives
3682       lazy propagation. */
3683    tl_assert(isOriginalAtom(mce, cond));
3684    tl_assert(isOriginalAtom(mce, expr0));
3685    tl_assert(isOriginalAtom(mce, exprX));
3686 
3687    vbitsC = expr2vbits(mce, cond);
3688    vbits0 = expr2vbits(mce, expr0);
3689    vbitsX = expr2vbits(mce, exprX);
3690    ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
3691 
3692    return
3693       mkUifU(mce, ty, assignNew('V', mce, ty,
3694                                      IRExpr_Mux0X(cond, vbits0, vbitsX)),
3695                       mkPCastTo(mce, ty, vbitsC) );
3696 }
3697 
3698 /* --------- This is the main expression-handling function. --------- */
3699 
3700 static
expr2vbits(MCEnv * mce,IRExpr * e)3701 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
3702 {
3703    switch (e->tag) {
3704 
3705       case Iex_Get:
3706          return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
3707 
3708       case Iex_GetI:
3709          return shadow_GETI( mce, e->Iex.GetI.descr,
3710                                   e->Iex.GetI.ix, e->Iex.GetI.bias );
3711 
3712       case Iex_RdTmp:
3713          return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
3714 
3715       case Iex_Const:
3716          return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
3717 
3718       case Iex_Qop:
3719          return expr2vbits_Qop(
3720                    mce,
3721                    e->Iex.Qop.op,
3722                    e->Iex.Qop.arg1, e->Iex.Qop.arg2,
3723 		   e->Iex.Qop.arg3, e->Iex.Qop.arg4
3724                 );
3725 
3726       case Iex_Triop:
3727          return expr2vbits_Triop(
3728                    mce,
3729                    e->Iex.Triop.op,
3730                    e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
3731                 );
3732 
3733       case Iex_Binop:
3734          return expr2vbits_Binop(
3735                    mce,
3736                    e->Iex.Binop.op,
3737                    e->Iex.Binop.arg1, e->Iex.Binop.arg2
3738                 );
3739 
3740       case Iex_Unop:
3741          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
3742 
3743       case Iex_Load:
3744          return expr2vbits_Load( mce, e->Iex.Load.end,
3745                                       e->Iex.Load.ty,
3746                                       e->Iex.Load.addr, 0/*addr bias*/ );
3747 
3748       case Iex_CCall:
3749          return mkLazyN( mce, e->Iex.CCall.args,
3750                               e->Iex.CCall.retty,
3751                               e->Iex.CCall.cee );
3752 
3753       case Iex_Mux0X:
3754          return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
3755                                        e->Iex.Mux0X.exprX);
3756 
3757       default:
3758          VG_(printf)("\n");
3759          ppIRExpr(e);
3760          VG_(printf)("\n");
3761          VG_(tool_panic)("memcheck: expr2vbits");
3762    }
3763 }
3764 
3765 /*------------------------------------------------------------*/
3766 /*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
3767 /*------------------------------------------------------------*/
3768 
3769 /* Widen a value to the host word size. */
3770 
3771 static
zwidenToHostWord(MCEnv * mce,IRAtom * vatom)3772 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
3773 {
3774    IRType ty, tyH;
3775 
3776    /* vatom is vbits-value and as such can only have a shadow type. */
3777    tl_assert(isShadowAtom(mce,vatom));
3778 
3779    ty  = typeOfIRExpr(mce->sb->tyenv, vatom);
3780    tyH = mce->hWordTy;
3781 
3782    if (tyH == Ity_I32) {
3783       switch (ty) {
3784          case Ity_I32:
3785             return vatom;
3786          case Ity_I16:
3787             return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
3788          case Ity_I8:
3789             return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
3790          default:
3791             goto unhandled;
3792       }
3793    } else
3794    if (tyH == Ity_I64) {
3795       switch (ty) {
3796          case Ity_I32:
3797             return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
3798          case Ity_I16:
3799             return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3800                    assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
3801          case Ity_I8:
3802             return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3803                    assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
3804          default:
3805             goto unhandled;
3806       }
3807    } else {
3808       goto unhandled;
3809    }
3810   unhandled:
3811    VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
3812    VG_(tool_panic)("zwidenToHostWord");
3813 }
3814 
3815 
3816 /* Generate a shadow store.  addr is always the original address atom.
3817    You can pass in either originals or V-bits for the data atom, but
3818    obviously not both.  guard :: Ity_I1 controls whether the store
3819    really happens; NULL means it unconditionally does.  Note that
3820    guard itself is not checked for definedness; the caller of this
3821    function must do that if necessary. */
3822 
3823 static
do_shadow_Store(MCEnv * mce,IREndness end,IRAtom * addr,UInt bias,IRAtom * data,IRAtom * vdata,IRAtom * guard)3824 void do_shadow_Store ( MCEnv* mce,
3825                        IREndness end,
3826                        IRAtom* addr, UInt bias,
3827                        IRAtom* data, IRAtom* vdata,
3828                        IRAtom* guard )
3829 {
3830    IROp     mkAdd;
3831    IRType   ty, tyAddr;
3832    void*    helper = NULL;
3833    Char*    hname = NULL;
3834    IRConst* c;
3835 
3836    tyAddr = mce->hWordTy;
3837    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3838    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
3839    tl_assert( end == Iend_LE || end == Iend_BE );
3840 
3841    if (data) {
3842       tl_assert(!vdata);
3843       tl_assert(isOriginalAtom(mce, data));
3844       tl_assert(bias == 0);
3845       vdata = expr2vbits( mce, data );
3846    } else {
3847       tl_assert(vdata);
3848    }
3849 
3850    tl_assert(isOriginalAtom(mce,addr));
3851    tl_assert(isShadowAtom(mce,vdata));
3852 
3853    if (guard) {
3854       tl_assert(isOriginalAtom(mce, guard));
3855       tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
3856    }
3857 
3858    ty = typeOfIRExpr(mce->sb->tyenv, vdata);
3859 
3860    // If we're not doing undefined value checking, pretend that this value
3861    // is "all valid".  That lets Vex's optimiser remove some of the V bit
3862    // shadow computation ops that precede it.
3863    if (MC_(clo_mc_level) == 1) {
3864       switch (ty) {
3865          case Ity_V128: // V128 weirdness
3866                         c = IRConst_V128(V_BITS16_DEFINED); break;
3867          case Ity_I64:  c = IRConst_U64 (V_BITS64_DEFINED); break;
3868          case Ity_I32:  c = IRConst_U32 (V_BITS32_DEFINED); break;
3869          case Ity_I16:  c = IRConst_U16 (V_BITS16_DEFINED); break;
3870          case Ity_I8:   c = IRConst_U8  (V_BITS8_DEFINED);  break;
3871          default:       VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3872       }
3873       vdata = IRExpr_Const( c );
3874    }
3875 
3876    /* First, emit a definedness test for the address.  This also sets
3877       the address (shadow) to 'defined' following the test. */
3878    complainIfUndefined( mce, addr );
3879 
3880    /* Now decide which helper function to call to write the data V
3881       bits into shadow memory. */
3882    if (end == Iend_LE) {
3883       switch (ty) {
3884          case Ity_V128: /* we'll use the helper twice */
3885          case Ity_I64: helper = &MC_(helperc_STOREV64le);
3886                        hname = "MC_(helperc_STOREV64le)";
3887                        break;
3888          case Ity_I32: helper = &MC_(helperc_STOREV32le);
3889                        hname = "MC_(helperc_STOREV32le)";
3890                        break;
3891          case Ity_I16: helper = &MC_(helperc_STOREV16le);
3892                        hname = "MC_(helperc_STOREV16le)";
3893                        break;
3894          case Ity_I8:  helper = &MC_(helperc_STOREV8);
3895                        hname = "MC_(helperc_STOREV8)";
3896                        break;
3897          default:      VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3898       }
3899    } else {
3900       switch (ty) {
3901          case Ity_V128: /* we'll use the helper twice */
3902          case Ity_I64: helper = &MC_(helperc_STOREV64be);
3903                        hname = "MC_(helperc_STOREV64be)";
3904                        break;
3905          case Ity_I32: helper = &MC_(helperc_STOREV32be);
3906                        hname = "MC_(helperc_STOREV32be)";
3907                        break;
3908          case Ity_I16: helper = &MC_(helperc_STOREV16be);
3909                        hname = "MC_(helperc_STOREV16be)";
3910                        break;
3911          case Ity_I8:  helper = &MC_(helperc_STOREV8);
3912                        hname = "MC_(helperc_STOREV8)";
3913                        break;
3914          default:      VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
3915       }
3916    }
3917 
3918    if (ty == Ity_V128) {
3919 
3920       /* V128-bit case */
3921       /* See comment in next clause re 64-bit regparms */
3922       /* also, need to be careful about endianness */
3923 
3924       Int     offLo64, offHi64;
3925       IRDirty *diLo64, *diHi64;
3926       IRAtom  *addrLo64, *addrHi64;
3927       IRAtom  *vdataLo64, *vdataHi64;
3928       IRAtom  *eBiasLo64, *eBiasHi64;
3929 
3930       if (end == Iend_LE) {
3931          offLo64 = 0;
3932          offHi64 = 8;
3933       } else {
3934          offLo64 = 8;
3935          offHi64 = 0;
3936       }
3937 
3938       eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
3939       addrLo64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
3940       vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
3941       diLo64    = unsafeIRDirty_0_N(
3942                      1/*regparms*/,
3943                      hname, VG_(fnptr_to_fnentry)( helper ),
3944                      mkIRExprVec_2( addrLo64, vdataLo64 )
3945                   );
3946       eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
3947       addrHi64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
3948       vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
3949       diHi64    = unsafeIRDirty_0_N(
3950                      1/*regparms*/,
3951                      hname, VG_(fnptr_to_fnentry)( helper ),
3952                      mkIRExprVec_2( addrHi64, vdataHi64 )
3953                   );
3954       if (guard) diLo64->guard = guard;
3955       if (guard) diHi64->guard = guard;
3956       setHelperAnns( mce, diLo64 );
3957       setHelperAnns( mce, diHi64 );
3958       stmt( 'V', mce, IRStmt_Dirty(diLo64) );
3959       stmt( 'V', mce, IRStmt_Dirty(diHi64) );
3960 
3961    } else {
3962 
3963       IRDirty *di;
3964       IRAtom  *addrAct;
3965 
3966       /* 8/16/32/64-bit cases */
3967       /* Generate the actual address into addrAct. */
3968       if (bias == 0) {
3969          addrAct = addr;
3970       } else {
3971          IRAtom* eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
3972          addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
3973       }
3974 
3975       if (ty == Ity_I64) {
3976          /* We can't do this with regparm 2 on 32-bit platforms, since
3977             the back ends aren't clever enough to handle 64-bit
3978             regparm args.  Therefore be different. */
3979          di = unsafeIRDirty_0_N(
3980                  1/*regparms*/,
3981                  hname, VG_(fnptr_to_fnentry)( helper ),
3982                  mkIRExprVec_2( addrAct, vdata )
3983               );
3984       } else {
3985          di = unsafeIRDirty_0_N(
3986                  2/*regparms*/,
3987                  hname, VG_(fnptr_to_fnentry)( helper ),
3988                  mkIRExprVec_2( addrAct,
3989                                 zwidenToHostWord( mce, vdata ))
3990               );
3991       }
3992       if (guard) di->guard = guard;
3993       setHelperAnns( mce, di );
3994       stmt( 'V', mce, IRStmt_Dirty(di) );
3995    }
3996 
3997 }
3998 
3999 
4000 /* Do lazy pessimistic propagation through a dirty helper call, by
4001    looking at the annotations on it.  This is the most complex part of
4002    Memcheck. */
4003 
szToITy(Int n)4004 static IRType szToITy ( Int n )
4005 {
4006    switch (n) {
4007       case 1: return Ity_I8;
4008       case 2: return Ity_I16;
4009       case 4: return Ity_I32;
4010       case 8: return Ity_I64;
4011       default: VG_(tool_panic)("szToITy(memcheck)");
4012    }
4013 }
4014 
4015 static
do_shadow_Dirty(MCEnv * mce,IRDirty * d)4016 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4017 {
4018    Int       i, n, toDo, gSz, gOff;
4019    IRAtom    *src, *here, *curr;
4020    IRType    tySrc, tyDst;
4021    IRTemp    dst;
4022    IREndness end;
4023 
4024    /* What's the native endianness?  We need to know this. */
4025 #  if defined(VG_BIGENDIAN)
4026    end = Iend_BE;
4027 #  elif defined(VG_LITTLEENDIAN)
4028    end = Iend_LE;
4029 #  else
4030 #    error "Unknown endianness"
4031 #  endif
4032 
4033    /* First check the guard. */
4034    complainIfUndefined(mce, d->guard);
4035 
4036    /* Now round up all inputs and PCast over them. */
4037    curr = definedOfType(Ity_I32);
4038 
4039    /* Inputs: unmasked args */
4040    for (i = 0; d->args[i]; i++) {
4041       if (d->cee->mcx_mask & (1<<i)) {
4042          /* ignore this arg */
4043       } else {
4044          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
4045          curr = mkUifU32(mce, here, curr);
4046       }
4047    }
4048 
4049    /* Inputs: guest state that we read. */
4050    for (i = 0; i < d->nFxState; i++) {
4051       tl_assert(d->fxState[i].fx != Ifx_None);
4052       if (d->fxState[i].fx == Ifx_Write)
4053          continue;
4054 
4055       /* Ignore any sections marked as 'always defined'. */
4056       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
4057          if (0)
4058          VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4059                      d->fxState[i].offset, d->fxState[i].size );
4060          continue;
4061       }
4062 
4063       /* This state element is read or modified.  So we need to
4064          consider it.  If larger than 8 bytes, deal with it in 8-byte
4065          chunks. */
4066       gSz  = d->fxState[i].size;
4067       gOff = d->fxState[i].offset;
4068       tl_assert(gSz > 0);
4069       while (True) {
4070          if (gSz == 0) break;
4071          n = gSz <= 8 ? gSz : 8;
4072          /* update 'curr' with UifU of the state slice
4073             gOff .. gOff+n-1 */
4074          tySrc = szToITy( n );
4075          src   = assignNew( 'V', mce, tySrc,
4076                                  shadow_GET(mce, gOff, tySrc ) );
4077          here = mkPCastTo( mce, Ity_I32, src );
4078          curr = mkUifU32(mce, here, curr);
4079          gSz -= n;
4080          gOff += n;
4081       }
4082 
4083    }
4084 
4085    /* Inputs: memory.  First set up some info needed regardless of
4086       whether we're doing reads or writes. */
4087 
4088    if (d->mFx != Ifx_None) {
4089       /* Because we may do multiple shadow loads/stores from the same
4090          base address, it's best to do a single test of its
4091          definedness right now.  Post-instrumentation optimisation
4092          should remove all but this test. */
4093       IRType tyAddr;
4094       tl_assert(d->mAddr);
4095       complainIfUndefined(mce, d->mAddr);
4096 
4097       tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
4098       tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4099       tl_assert(tyAddr == mce->hWordTy); /* not really right */
4100    }
4101 
4102    /* Deal with memory inputs (reads or modifies) */
4103    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
4104       toDo   = d->mSize;
4105       /* chew off 32-bit chunks.  We don't care about the endianness
4106          since it's all going to be condensed down to a single bit,
4107          but nevertheless choose an endianness which is hopefully
4108          native to the platform. */
4109       while (toDo >= 4) {
4110          here = mkPCastTo(
4111                    mce, Ity_I32,
4112                    expr2vbits_Load ( mce, end, Ity_I32,
4113                                      d->mAddr, d->mSize - toDo )
4114                 );
4115          curr = mkUifU32(mce, here, curr);
4116          toDo -= 4;
4117       }
4118       /* chew off 16-bit chunks */
4119       while (toDo >= 2) {
4120          here = mkPCastTo(
4121                    mce, Ity_I32,
4122                    expr2vbits_Load ( mce, end, Ity_I16,
4123                                      d->mAddr, d->mSize - toDo )
4124                 );
4125          curr = mkUifU32(mce, here, curr);
4126          toDo -= 2;
4127       }
4128       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
4129    }
4130 
4131    /* Whew!  So curr is a 32-bit V-value summarising pessimistically
4132       all the inputs to the helper.  Now we need to re-distribute the
4133       results to all destinations. */
4134 
4135    /* Outputs: the destination temporary, if there is one. */
4136    if (d->tmp != IRTemp_INVALID) {
4137       dst   = findShadowTmpV(mce, d->tmp);
4138       tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
4139       assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
4140    }
4141 
4142    /* Outputs: guest state that we write or modify. */
4143    for (i = 0; i < d->nFxState; i++) {
4144       tl_assert(d->fxState[i].fx != Ifx_None);
4145       if (d->fxState[i].fx == Ifx_Read)
4146          continue;
4147       /* Ignore any sections marked as 'always defined'. */
4148       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
4149          continue;
4150       /* This state element is written or modified.  So we need to
4151          consider it.  If larger than 8 bytes, deal with it in 8-byte
4152          chunks. */
4153       gSz  = d->fxState[i].size;
4154       gOff = d->fxState[i].offset;
4155       tl_assert(gSz > 0);
4156       while (True) {
4157          if (gSz == 0) break;
4158          n = gSz <= 8 ? gSz : 8;
4159          /* Write suitably-casted 'curr' to the state slice
4160             gOff .. gOff+n-1 */
4161          tyDst = szToITy( n );
4162          do_shadow_PUT( mce, gOff,
4163                              NULL, /* original atom */
4164                              mkPCastTo( mce, tyDst, curr ) );
4165          gSz -= n;
4166          gOff += n;
4167       }
4168    }
4169 
4170    /* Outputs: memory that we write or modify.  Same comments about
4171       endianness as above apply. */
4172    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
4173       toDo   = d->mSize;
4174       /* chew off 32-bit chunks */
4175       while (toDo >= 4) {
4176          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4177                           NULL, /* original data */
4178                           mkPCastTo( mce, Ity_I32, curr ),
4179                           NULL/*guard*/ );
4180          toDo -= 4;
4181       }
4182       /* chew off 16-bit chunks */
4183       while (toDo >= 2) {
4184          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4185                           NULL, /* original data */
4186                           mkPCastTo( mce, Ity_I16, curr ),
4187                           NULL/*guard*/ );
4188          toDo -= 2;
4189       }
4190       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
4191    }
4192 
4193 }
4194 
4195 
4196 /* We have an ABI hint telling us that [base .. base+len-1] is to
4197    become undefined ("writable").  Generate code to call a helper to
4198    notify the A/V bit machinery of this fact.
4199 
4200    We call
4201    void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
4202                                                     Addr nia );
4203 */
4204 static
do_AbiHint(MCEnv * mce,IRExpr * base,Int len,IRExpr * nia)4205 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
4206 {
4207    IRDirty* di;
4208    /* Minor optimisation: if not doing origin tracking, ignore the
4209       supplied nia and pass zero instead.  This is on the basis that
4210       MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
4211       almost always generate a shorter instruction to put zero into a
4212       register than any other value. */
4213    if (MC_(clo_mc_level) < 3)
4214       nia = mkIRExpr_HWord(0);
4215 
4216    di = unsafeIRDirty_0_N(
4217            0/*regparms*/,
4218            "MC_(helperc_MAKE_STACK_UNINIT)",
4219            VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
4220            mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
4221         );
4222    stmt( 'V', mce, IRStmt_Dirty(di) );
4223 }
4224 
4225 
4226 /* ------ Dealing with IRCAS (big and complex) ------ */
4227 
4228 /* FWDS */
4229 static IRAtom* gen_load_b  ( MCEnv* mce, Int szB,
4230                              IRAtom* baseaddr, Int offset );
4231 static IRAtom* gen_maxU32  ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4232 static void    gen_store_b ( MCEnv* mce, Int szB,
4233                              IRAtom* baseaddr, Int offset, IRAtom* dataB,
4234                              IRAtom* guard );
4235 
4236 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4237 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4238 
4239 
4240 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4241    IRExpr.Consts, else this asserts.  If they are both Consts, it
4242    doesn't do anything.  So that just leaves the RdTmp case.
4243 
4244    In which case: this assigns the shadow value SHADOW to the IR
4245    shadow temporary associated with ORIG.  That is, ORIG, being an
4246    original temporary, will have a shadow temporary associated with
4247    it.  However, in the case envisaged here, there will so far have
4248    been no IR emitted to actually write a shadow value into that
4249    temporary.  What this routine does is to (emit IR to) copy the
4250    value in SHADOW into said temporary, so that after this call,
4251    IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4252    value in SHADOW.
4253 
4254    Point is to allow callers to compute "by hand" a shadow value for
4255    ORIG, and force it to be associated with ORIG.
4256 
4257    How do we know that that shadow associated with ORIG has not so far
4258    been assigned to?  Well, we don't per se know that, but supposing
4259    it had.  Then this routine would create a second assignment to it,
4260    and later the IR sanity checker would barf.  But that never
4261    happens.  QED.
4262 */
bind_shadow_tmp_to_orig(UChar how,MCEnv * mce,IRAtom * orig,IRAtom * shadow)4263 static void bind_shadow_tmp_to_orig ( UChar how,
4264                                       MCEnv* mce,
4265                                       IRAtom* orig, IRAtom* shadow )
4266 {
4267    tl_assert(isOriginalAtom(mce, orig));
4268    tl_assert(isShadowAtom(mce, shadow));
4269    switch (orig->tag) {
4270       case Iex_Const:
4271          tl_assert(shadow->tag == Iex_Const);
4272          break;
4273       case Iex_RdTmp:
4274          tl_assert(shadow->tag == Iex_RdTmp);
4275          if (how == 'V') {
4276             assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4277                    shadow);
4278          } else {
4279             tl_assert(how == 'B');
4280             assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4281                    shadow);
4282          }
4283          break;
4284       default:
4285          tl_assert(0);
4286    }
4287 }
4288 
4289 
4290 static
do_shadow_CAS(MCEnv * mce,IRCAS * cas)4291 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4292 {
4293    /* Scheme is (both single- and double- cases):
4294 
4295       1. fetch data#,dataB (the proposed new value)
4296 
4297       2. fetch expd#,expdB (what we expect to see at the address)
4298 
4299       3. check definedness of address
4300 
4301       4. load old#,oldB from shadow memory; this also checks
4302          addressibility of the address
4303 
4304       5. the CAS itself
4305 
4306       6. compute "expected == old".  See COMMENT_ON_CasCmpEQ below.
4307 
4308       7. if "expected == old" (as computed by (6))
4309             store data#,dataB to shadow memory
4310 
4311       Note that 5 reads 'old' but 4 reads 'old#'.  Similarly, 5 stores
4312       'data' but 7 stores 'data#'.  Hence it is possible for the
4313       shadow data to be incorrectly checked and/or updated:
4314 
4315       * 7 is at least gated correctly, since the 'expected == old'
4316         condition is derived from outputs of 5.  However, the shadow
4317         write could happen too late: imagine after 5 we are
4318         descheduled, a different thread runs, writes a different
4319         (shadow) value at the address, and then we resume, hence
4320         overwriting the shadow value written by the other thread.
4321 
4322       Because the original memory access is atomic, there's no way to
4323       make both the original and shadow accesses into a single atomic
4324       thing, hence this is unavoidable.
4325 
4326       At least as Valgrind stands, I don't think it's a problem, since
4327       we're single threaded *and* we guarantee that there are no
4328       context switches during the execution of any specific superblock
4329       -- context switches can only happen at superblock boundaries.
4330 
4331       If Valgrind ever becomes MT in the future, then it might be more
4332       of a problem.  A possible kludge would be to artificially
4333       associate with the location, a lock, which we must acquire and
4334       release around the transaction as a whole.  Hmm, that probably
4335       would't work properly since it only guards us against other
4336       threads doing CASs on the same location, not against other
4337       threads doing normal reads and writes.
4338 
4339       ------------------------------------------------------------
4340 
4341       COMMENT_ON_CasCmpEQ:
4342 
4343       Note two things.  Firstly, in the sequence above, we compute
4344       "expected == old", but we don't check definedness of it.  Why
4345       not?  Also, the x86 and amd64 front ends use
4346       Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4347       determination (expected == old ?) for themselves, and we also
4348       don't check definedness for those primops; we just say that the
4349       result is defined.  Why?  Details follow.
4350 
4351       x86/amd64 contains various forms of locked insns:
4352       * lock prefix before all basic arithmetic insn;
4353         eg lock xorl %reg1,(%reg2)
4354       * atomic exchange reg-mem
4355       * compare-and-swaps
4356 
4357       Rather than attempt to represent them all, which would be a
4358       royal PITA, I used a result from Maurice Herlihy
4359       (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4360       demonstrates that compare-and-swap is a primitive more general
4361       than the other two, and so can be used to represent all of them.
4362       So the translation scheme for (eg) lock incl (%reg) is as
4363       follows:
4364 
4365         again:
4366          old = * %reg
4367          new = old + 1
4368          atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4369 
4370       The "atomically" is the CAS bit.  The scheme is always the same:
4371       get old value from memory, compute new value, atomically stuff
4372       new value back in memory iff the old value has not changed (iow,
4373       no other thread modified it in the meantime).  If it has changed
4374       then we've been out-raced and we have to start over.
4375 
4376       Now that's all very neat, but it has the bad side effect of
4377       introducing an explicit equality test into the translation.
4378       Consider the behaviour of said code on a memory location which
4379       is uninitialised.  We will wind up doing a comparison on
4380       uninitialised data, and mc duly complains.
4381 
4382       What's difficult about this is, the common case is that the
4383       location is uncontended, and so we're usually comparing the same
4384       value (* %reg) with itself.  So we shouldn't complain even if it
4385       is undefined.  But mc doesn't know that.
4386 
4387       My solution is to mark the == in the IR specially, so as to tell
4388       mc that it almost certainly compares a value with itself, and we
4389       should just regard the result as always defined.  Rather than
4390       add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4391       Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4392 
4393       So there's always the question of, can this give a false
4394       negative?  eg, imagine that initially, * %reg is defined; and we
4395       read that; but then in the gap between the read and the CAS, a
4396       different thread writes an undefined (and different) value at
4397       the location.  Then the CAS in this thread will fail and we will
4398       go back to "again:", but without knowing that the trip back
4399       there was based on an undefined comparison.  No matter; at least
4400       the other thread won the race and the location is correctly
4401       marked as undefined.  What if it wrote an uninitialised version
4402       of the same value that was there originally, though?
4403 
4404       etc etc.  Seems like there's a small corner case in which we
4405       might lose the fact that something's defined -- we're out-raced
4406       in between the "old = * reg" and the "atomically {", _and_ the
4407       other thread is writing in an undefined version of what's
4408       already there.  Well, that seems pretty unlikely.
4409 
4410       ---
4411 
4412       If we ever need to reinstate it .. code which generates a
4413       definedness test for "expected == old" was removed at r10432 of
4414       this file.
4415    */
4416    if (cas->oldHi == IRTemp_INVALID) {
4417       do_shadow_CAS_single( mce, cas );
4418    } else {
4419       do_shadow_CAS_double( mce, cas );
4420    }
4421 }
4422 
4423 
do_shadow_CAS_single(MCEnv * mce,IRCAS * cas)4424 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4425 {
4426    IRAtom *vdataLo = NULL, *bdataLo = NULL;
4427    IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4428    IRAtom *voldLo  = NULL, *boldLo  = NULL;
4429    IRAtom *expd_eq_old = NULL;
4430    IROp   opCasCmpEQ;
4431    Int    elemSzB;
4432    IRType elemTy;
4433    Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4434 
4435    /* single CAS */
4436    tl_assert(cas->oldHi == IRTemp_INVALID);
4437    tl_assert(cas->expdHi == NULL);
4438    tl_assert(cas->dataHi == NULL);
4439 
4440    elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4441    switch (elemTy) {
4442       case Ity_I8:  elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8;  break;
4443       case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4444       case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4445       case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
4446       default: tl_assert(0); /* IR defn disallows any other types */
4447    }
4448 
4449    /* 1. fetch data# (the proposed new value) */
4450    tl_assert(isOriginalAtom(mce, cas->dataLo));
4451    vdataLo
4452       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4453    tl_assert(isShadowAtom(mce, vdataLo));
4454    if (otrak) {
4455       bdataLo
4456          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4457       tl_assert(isShadowAtom(mce, bdataLo));
4458    }
4459 
4460    /* 2. fetch expected# (what we expect to see at the address) */
4461    tl_assert(isOriginalAtom(mce, cas->expdLo));
4462    vexpdLo
4463       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4464    tl_assert(isShadowAtom(mce, vexpdLo));
4465    if (otrak) {
4466       bexpdLo
4467          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4468       tl_assert(isShadowAtom(mce, bexpdLo));
4469    }
4470 
4471    /* 3. check definedness of address */
4472    /* 4. fetch old# from shadow memory; this also checks
4473          addressibility of the address */
4474    voldLo
4475       = assignNew(
4476            'V', mce, elemTy,
4477            expr2vbits_Load(
4478               mce,
4479               cas->end, elemTy, cas->addr, 0/*Addr bias*/
4480         ));
4481    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
4482    if (otrak) {
4483       boldLo
4484          = assignNew('B', mce, Ity_I32,
4485                      gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
4486       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
4487    }
4488 
4489    /* 5. the CAS itself */
4490    stmt( 'C', mce, IRStmt_CAS(cas) );
4491 
4492    /* 6. compute "expected == old" */
4493    /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
4494    /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4495       tree, but it's not copied from the input block. */
4496    expd_eq_old
4497       = assignNew('C', mce, Ity_I1,
4498                   binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
4499 
4500    /* 7. if "expected == old"
4501             store data# to shadow memory */
4502    do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
4503                     NULL/*data*/, vdataLo/*vdata*/,
4504                     expd_eq_old/*guard for store*/ );
4505    if (otrak) {
4506       gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
4507                    bdataLo/*bdata*/,
4508                    expd_eq_old/*guard for store*/ );
4509    }
4510 }
4511 
4512 
do_shadow_CAS_double(MCEnv * mce,IRCAS * cas)4513 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
4514 {
4515    IRAtom *vdataHi = NULL, *bdataHi = NULL;
4516    IRAtom *vdataLo = NULL, *bdataLo = NULL;
4517    IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
4518    IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4519    IRAtom *voldHi  = NULL, *boldHi  = NULL;
4520    IRAtom *voldLo  = NULL, *boldLo  = NULL;
4521    IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
4522    IRAtom *expd_eq_old = NULL, *zero = NULL;
4523    IROp   opCasCmpEQ, opOr, opXor;
4524    Int    elemSzB, memOffsLo, memOffsHi;
4525    IRType elemTy;
4526    Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4527 
4528    /* double CAS */
4529    tl_assert(cas->oldHi != IRTemp_INVALID);
4530    tl_assert(cas->expdHi != NULL);
4531    tl_assert(cas->dataHi != NULL);
4532 
4533    elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4534    switch (elemTy) {
4535       case Ity_I8:
4536          opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
4537          elemSzB = 1; zero = mkU8(0);
4538          break;
4539       case Ity_I16:
4540          opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
4541          elemSzB = 2; zero = mkU16(0);
4542          break;
4543       case Ity_I32:
4544          opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
4545          elemSzB = 4; zero = mkU32(0);
4546          break;
4547       case Ity_I64:
4548          opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
4549          elemSzB = 8; zero = mkU64(0);
4550          break;
4551       default:
4552          tl_assert(0); /* IR defn disallows any other types */
4553    }
4554 
4555    /* 1. fetch data# (the proposed new value) */
4556    tl_assert(isOriginalAtom(mce, cas->dataHi));
4557    tl_assert(isOriginalAtom(mce, cas->dataLo));
4558    vdataHi
4559       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
4560    vdataLo
4561       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4562    tl_assert(isShadowAtom(mce, vdataHi));
4563    tl_assert(isShadowAtom(mce, vdataLo));
4564    if (otrak) {
4565       bdataHi
4566          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
4567       bdataLo
4568          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4569       tl_assert(isShadowAtom(mce, bdataHi));
4570       tl_assert(isShadowAtom(mce, bdataLo));
4571    }
4572 
4573    /* 2. fetch expected# (what we expect to see at the address) */
4574    tl_assert(isOriginalAtom(mce, cas->expdHi));
4575    tl_assert(isOriginalAtom(mce, cas->expdLo));
4576    vexpdHi
4577       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
4578    vexpdLo
4579       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4580    tl_assert(isShadowAtom(mce, vexpdHi));
4581    tl_assert(isShadowAtom(mce, vexpdLo));
4582    if (otrak) {
4583       bexpdHi
4584          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
4585       bexpdLo
4586          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4587       tl_assert(isShadowAtom(mce, bexpdHi));
4588       tl_assert(isShadowAtom(mce, bexpdLo));
4589    }
4590 
4591    /* 3. check definedness of address */
4592    /* 4. fetch old# from shadow memory; this also checks
4593          addressibility of the address */
4594    if (cas->end == Iend_LE) {
4595       memOffsLo = 0;
4596       memOffsHi = elemSzB;
4597    } else {
4598       tl_assert(cas->end == Iend_BE);
4599       memOffsLo = elemSzB;
4600       memOffsHi = 0;
4601    }
4602    voldHi
4603       = assignNew(
4604            'V', mce, elemTy,
4605            expr2vbits_Load(
4606               mce,
4607               cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
4608         ));
4609    voldLo
4610       = assignNew(
4611            'V', mce, elemTy,
4612            expr2vbits_Load(
4613               mce,
4614               cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
4615         ));
4616    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
4617    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
4618    if (otrak) {
4619       boldHi
4620          = assignNew('B', mce, Ity_I32,
4621                      gen_load_b(mce, elemSzB, cas->addr,
4622                                 memOffsHi/*addr bias*/));
4623       boldLo
4624          = assignNew('B', mce, Ity_I32,
4625                      gen_load_b(mce, elemSzB, cas->addr,
4626                                 memOffsLo/*addr bias*/));
4627       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
4628       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
4629    }
4630 
4631    /* 5. the CAS itself */
4632    stmt( 'C', mce, IRStmt_CAS(cas) );
4633 
4634    /* 6. compute "expected == old" */
4635    /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
4636    /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4637       tree, but it's not copied from the input block. */
4638    /*
4639       xHi = oldHi ^ expdHi;
4640       xLo = oldLo ^ expdLo;
4641       xHL = xHi | xLo;
4642       expd_eq_old = xHL == 0;
4643    */
4644    xHi = assignNew('C', mce, elemTy,
4645                    binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
4646    xLo = assignNew('C', mce, elemTy,
4647                    binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
4648    xHL = assignNew('C', mce, elemTy,
4649                    binop(opOr, xHi, xLo));
4650    expd_eq_old
4651       = assignNew('C', mce, Ity_I1,
4652                   binop(opCasCmpEQ, xHL, zero));
4653 
4654    /* 7. if "expected == old"
4655             store data# to shadow memory */
4656    do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
4657                     NULL/*data*/, vdataHi/*vdata*/,
4658                     expd_eq_old/*guard for store*/ );
4659    do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
4660                     NULL/*data*/, vdataLo/*vdata*/,
4661                     expd_eq_old/*guard for store*/ );
4662    if (otrak) {
4663       gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
4664                    bdataHi/*bdata*/,
4665                    expd_eq_old/*guard for store*/ );
4666       gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
4667                    bdataLo/*bdata*/,
4668                    expd_eq_old/*guard for store*/ );
4669    }
4670 }
4671 
4672 
4673 /* ------ Dealing with LL/SC (not difficult) ------ */
4674 
do_shadow_LLSC(MCEnv * mce,IREndness stEnd,IRTemp stResult,IRExpr * stAddr,IRExpr * stStoredata)4675 static void do_shadow_LLSC ( MCEnv*    mce,
4676                              IREndness stEnd,
4677                              IRTemp    stResult,
4678                              IRExpr*   stAddr,
4679                              IRExpr*   stStoredata )
4680 {
4681    /* In short: treat a load-linked like a normal load followed by an
4682       assignment of the loaded (shadow) data to the result temporary.
4683       Treat a store-conditional like a normal store, and mark the
4684       result temporary as defined. */
4685    IRType resTy  = typeOfIRTemp(mce->sb->tyenv, stResult);
4686    IRTemp resTmp = findShadowTmpV(mce, stResult);
4687 
4688    tl_assert(isIRAtom(stAddr));
4689    if (stStoredata)
4690       tl_assert(isIRAtom(stStoredata));
4691 
4692    if (stStoredata == NULL) {
4693       /* Load Linked */
4694       /* Just treat this as a normal load, followed by an assignment of
4695          the value to .result. */
4696       /* Stay sane */
4697       tl_assert(resTy == Ity_I64 || resTy == Ity_I32
4698                 || resTy == Ity_I16 || resTy == Ity_I8);
4699       assign( 'V', mce, resTmp,
4700                    expr2vbits_Load(
4701                       mce, stEnd, resTy, stAddr, 0/*addr bias*/));
4702    } else {
4703       /* Store Conditional */
4704       /* Stay sane */
4705       IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
4706                                    stStoredata);
4707       tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
4708                 || dataTy == Ity_I16 || dataTy == Ity_I8);
4709       do_shadow_Store( mce, stEnd,
4710                             stAddr, 0/* addr bias */,
4711                             stStoredata,
4712                             NULL /* shadow data */,
4713                             NULL/*guard*/ );
4714       /* This is a store conditional, so it writes to .result a value
4715          indicating whether or not the store succeeded.  Just claim
4716          this value is always defined.  In the PowerPC interpretation
4717          of store-conditional, definedness of the success indication
4718          depends on whether the address of the store matches the
4719          reservation address.  But we can't tell that here (and
4720          anyway, we're not being PowerPC-specific).  At least we are
4721          guaranteed that the definedness of the store address, and its
4722          addressibility, will be checked as per normal.  So it seems
4723          pretty safe to just say that the success indication is always
4724          defined.
4725 
4726          In schemeS, for origin tracking, we must correspondingly set
4727          a no-origin value for the origin shadow of .result.
4728       */
4729       tl_assert(resTy == Ity_I1);
4730       assign( 'V', mce, resTmp, definedOfType(resTy) );
4731    }
4732 }
4733 
4734 
4735 /*------------------------------------------------------------*/
4736 /*--- Memcheck main                                        ---*/
4737 /*------------------------------------------------------------*/
4738 
4739 static void schemeS ( MCEnv* mce, IRStmt* st );
4740 
isBogusAtom(IRAtom * at)4741 static Bool isBogusAtom ( IRAtom* at )
4742 {
4743    ULong n = 0;
4744    IRConst* con;
4745    tl_assert(isIRAtom(at));
4746    if (at->tag == Iex_RdTmp)
4747       return False;
4748    tl_assert(at->tag == Iex_Const);
4749    con = at->Iex.Const.con;
4750    switch (con->tag) {
4751       case Ico_U1:   return False;
4752       case Ico_U8:   n = (ULong)con->Ico.U8; break;
4753       case Ico_U16:  n = (ULong)con->Ico.U16; break;
4754       case Ico_U32:  n = (ULong)con->Ico.U32; break;
4755       case Ico_U64:  n = (ULong)con->Ico.U64; break;
4756       case Ico_F64:  return False;
4757       case Ico_F32i: return False;
4758       case Ico_F64i: return False;
4759       case Ico_V128: return False;
4760       default: ppIRExpr(at); tl_assert(0);
4761    }
4762    /* VG_(printf)("%llx\n", n); */
4763    return (/*32*/    n == 0xFEFEFEFFULL
4764            /*32*/ || n == 0x80808080ULL
4765            /*32*/ || n == 0x7F7F7F7FULL
4766            /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
4767            /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
4768            /*64*/ || n == 0x0000000000008080ULL
4769            /*64*/ || n == 0x8080808080808080ULL
4770            /*64*/ || n == 0x0101010101010101ULL
4771           );
4772 }
4773 
checkForBogusLiterals(IRStmt * st)4774 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
4775 {
4776    Int      i;
4777    IRExpr*  e;
4778    IRDirty* d;
4779    IRCAS*   cas;
4780    switch (st->tag) {
4781       case Ist_WrTmp:
4782          e = st->Ist.WrTmp.data;
4783          switch (e->tag) {
4784             case Iex_Get:
4785             case Iex_RdTmp:
4786                return False;
4787             case Iex_Const:
4788                return isBogusAtom(e);
4789             case Iex_Unop:
4790                return isBogusAtom(e->Iex.Unop.arg);
4791             case Iex_GetI:
4792                return isBogusAtom(e->Iex.GetI.ix);
4793             case Iex_Binop:
4794                return isBogusAtom(e->Iex.Binop.arg1)
4795                       || isBogusAtom(e->Iex.Binop.arg2);
4796             case Iex_Triop:
4797                return isBogusAtom(e->Iex.Triop.arg1)
4798                       || isBogusAtom(e->Iex.Triop.arg2)
4799                       || isBogusAtom(e->Iex.Triop.arg3);
4800             case Iex_Qop:
4801                return isBogusAtom(e->Iex.Qop.arg1)
4802                       || isBogusAtom(e->Iex.Qop.arg2)
4803                       || isBogusAtom(e->Iex.Qop.arg3)
4804                       || isBogusAtom(e->Iex.Qop.arg4);
4805             case Iex_Mux0X:
4806                return isBogusAtom(e->Iex.Mux0X.cond)
4807                       || isBogusAtom(e->Iex.Mux0X.expr0)
4808                       || isBogusAtom(e->Iex.Mux0X.exprX);
4809             case Iex_Load:
4810                return isBogusAtom(e->Iex.Load.addr);
4811             case Iex_CCall:
4812                for (i = 0; e->Iex.CCall.args[i]; i++)
4813                   if (isBogusAtom(e->Iex.CCall.args[i]))
4814                      return True;
4815                return False;
4816             default:
4817                goto unhandled;
4818          }
4819       case Ist_Dirty:
4820          d = st->Ist.Dirty.details;
4821          for (i = 0; d->args[i]; i++)
4822             if (isBogusAtom(d->args[i]))
4823                return True;
4824          if (d->guard && isBogusAtom(d->guard))
4825             return True;
4826          if (d->mAddr && isBogusAtom(d->mAddr))
4827             return True;
4828          return False;
4829       case Ist_Put:
4830          return isBogusAtom(st->Ist.Put.data);
4831       case Ist_PutI:
4832          return isBogusAtom(st->Ist.PutI.ix)
4833                 || isBogusAtom(st->Ist.PutI.data);
4834       case Ist_Store:
4835          return isBogusAtom(st->Ist.Store.addr)
4836                 || isBogusAtom(st->Ist.Store.data);
4837       case Ist_Exit:
4838          return isBogusAtom(st->Ist.Exit.guard);
4839       case Ist_AbiHint:
4840          return isBogusAtom(st->Ist.AbiHint.base)
4841                 || isBogusAtom(st->Ist.AbiHint.nia);
4842       case Ist_NoOp:
4843       case Ist_IMark:
4844       case Ist_MBE:
4845          return False;
4846       case Ist_CAS:
4847          cas = st->Ist.CAS.details;
4848          return isBogusAtom(cas->addr)
4849                 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
4850                 || isBogusAtom(cas->expdLo)
4851                 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
4852                 || isBogusAtom(cas->dataLo);
4853       case Ist_LLSC:
4854          return isBogusAtom(st->Ist.LLSC.addr)
4855                 || (st->Ist.LLSC.storedata
4856                        ? isBogusAtom(st->Ist.LLSC.storedata)
4857                        : False);
4858       default:
4859       unhandled:
4860          ppIRStmt(st);
4861          VG_(tool_panic)("hasBogusLiterals");
4862    }
4863 }
4864 
4865 
MC_(instrument)4866 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
4867                         IRSB* sb_in,
4868                         VexGuestLayout* layout,
4869                         VexGuestExtents* vge,
4870                         IRType gWordTy, IRType hWordTy )
4871 {
4872    Bool    verboze = 0||False;
4873    Bool    bogus;
4874    Int     i, j, first_stmt;
4875    IRStmt* st;
4876    MCEnv   mce;
4877    IRSB*   sb_out;
4878 
4879    if (gWordTy != hWordTy) {
4880       /* We don't currently support this case. */
4881       VG_(tool_panic)("host/guest word size mismatch");
4882    }
4883 
4884    /* Check we're not completely nuts */
4885    tl_assert(sizeof(UWord)  == sizeof(void*));
4886    tl_assert(sizeof(Word)   == sizeof(void*));
4887    tl_assert(sizeof(Addr)   == sizeof(void*));
4888    tl_assert(sizeof(ULong)  == 8);
4889    tl_assert(sizeof(Long)   == 8);
4890    tl_assert(sizeof(Addr64) == 8);
4891    tl_assert(sizeof(UInt)   == 4);
4892    tl_assert(sizeof(Int)    == 4);
4893 
4894    tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
4895 
4896    /* Set up SB */
4897    sb_out = deepCopyIRSBExceptStmts(sb_in);
4898 
4899    /* Set up the running environment.  Both .sb and .tmpMap are
4900       modified as we go along.  Note that tmps are added to both
4901       .sb->tyenv and .tmpMap together, so the valid index-set for
4902       those two arrays should always be identical. */
4903    VG_(memset)(&mce, 0, sizeof(mce));
4904    mce.sb             = sb_out;
4905    mce.trace          = verboze;
4906    mce.layout         = layout;
4907    mce.hWordTy        = hWordTy;
4908    mce.bogusLiterals  = False;
4909 
4910    mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
4911                             sizeof(TempMapEnt));
4912    for (i = 0; i < sb_in->tyenv->types_used; i++) {
4913       TempMapEnt ent;
4914       ent.kind    = Orig;
4915       ent.shadowV = IRTemp_INVALID;
4916       ent.shadowB = IRTemp_INVALID;
4917       VG_(addToXA)( mce.tmpMap, &ent );
4918    }
4919    tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
4920 
4921    /* Make a preliminary inspection of the statements, to see if there
4922       are any dodgy-looking literals.  If there are, we generate
4923       extra-detailed (hence extra-expensive) instrumentation in
4924       places.  Scan the whole bb even if dodgyness is found earlier,
4925       so that the flatness assertion is applied to all stmts. */
4926 
4927    bogus = False;
4928 
4929    for (i = 0; i < sb_in->stmts_used; i++) {
4930 
4931       st = sb_in->stmts[i];
4932       tl_assert(st);
4933       tl_assert(isFlatIRStmt(st));
4934 
4935       if (!bogus) {
4936          bogus = checkForBogusLiterals(st);
4937          if (0 && bogus) {
4938             VG_(printf)("bogus: ");
4939             ppIRStmt(st);
4940             VG_(printf)("\n");
4941          }
4942       }
4943 
4944    }
4945 
4946    mce.bogusLiterals = bogus;
4947 
4948    /* Copy verbatim any IR preamble preceding the first IMark */
4949 
4950    tl_assert(mce.sb == sb_out);
4951    tl_assert(mce.sb != sb_in);
4952 
4953    i = 0;
4954    while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
4955 
4956       st = sb_in->stmts[i];
4957       tl_assert(st);
4958       tl_assert(isFlatIRStmt(st));
4959 
4960       stmt( 'C', &mce, sb_in->stmts[i] );
4961       i++;
4962    }
4963 
4964    /* Nasty problem.  IR optimisation of the pre-instrumented IR may
4965       cause the IR following the preamble to contain references to IR
4966       temporaries defined in the preamble.  Because the preamble isn't
4967       instrumented, these temporaries don't have any shadows.
4968       Nevertheless uses of them following the preamble will cause
4969       memcheck to generate references to their shadows.  End effect is
4970       to cause IR sanity check failures, due to references to
4971       non-existent shadows.  This is only evident for the complex
4972       preambles used for function wrapping on TOC-afflicted platforms
4973       (ppc64-linux).
4974 
4975       The following loop therefore scans the preamble looking for
4976       assignments to temporaries.  For each one found it creates an
4977       assignment to the corresponding (V) shadow temp, marking it as
4978       'defined'.  This is the same resulting IR as if the main
4979       instrumentation loop before had been applied to the statement
4980       'tmp = CONSTANT'.
4981 
4982       Similarly, if origin tracking is enabled, we must generate an
4983       assignment for the corresponding origin (B) shadow, claiming
4984       no-origin, as appropriate for a defined value.
4985    */
4986    for (j = 0; j < i; j++) {
4987       if (sb_in->stmts[j]->tag == Ist_WrTmp) {
4988          /* findShadowTmpV checks its arg is an original tmp;
4989             no need to assert that here. */
4990          IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
4991          IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
4992          IRType ty_v  = typeOfIRTemp(sb_out->tyenv, tmp_v);
4993          assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
4994          if (MC_(clo_mc_level) == 3) {
4995             IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
4996             tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
4997             assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
4998          }
4999          if (0) {
5000             VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
5001             ppIRType( ty_v );
5002             VG_(printf)("\n");
5003          }
5004       }
5005    }
5006 
5007    /* Iterate over the remaining stmts to generate instrumentation. */
5008 
5009    tl_assert(sb_in->stmts_used > 0);
5010    tl_assert(i >= 0);
5011    tl_assert(i < sb_in->stmts_used);
5012    tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
5013 
5014    for (/* use current i*/; i < sb_in->stmts_used; i++) {
5015 
5016       st = sb_in->stmts[i];
5017       first_stmt = sb_out->stmts_used;
5018 
5019       if (verboze) {
5020          VG_(printf)("\n");
5021          ppIRStmt(st);
5022          VG_(printf)("\n");
5023       }
5024 
5025       if (MC_(clo_mc_level) == 3) {
5026          /* See comments on case Ist_CAS below. */
5027          if (st->tag != Ist_CAS)
5028             schemeS( &mce, st );
5029       }
5030 
5031       /* Generate instrumentation code for each stmt ... */
5032 
5033       switch (st->tag) {
5034 
5035          case Ist_WrTmp:
5036             assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
5037                                expr2vbits( &mce, st->Ist.WrTmp.data) );
5038             break;
5039 
5040          case Ist_Put:
5041             do_shadow_PUT( &mce,
5042                            st->Ist.Put.offset,
5043                            st->Ist.Put.data,
5044                            NULL /* shadow atom */ );
5045             break;
5046 
5047          case Ist_PutI:
5048             do_shadow_PUTI( &mce,
5049                             st->Ist.PutI.descr,
5050                             st->Ist.PutI.ix,
5051                             st->Ist.PutI.bias,
5052                             st->Ist.PutI.data );
5053             break;
5054 
5055          case Ist_Store:
5056             do_shadow_Store( &mce, st->Ist.Store.end,
5057                                    st->Ist.Store.addr, 0/* addr bias */,
5058                                    st->Ist.Store.data,
5059                                    NULL /* shadow data */,
5060                                    NULL/*guard*/ );
5061             break;
5062 
5063          case Ist_Exit:
5064             complainIfUndefined( &mce, st->Ist.Exit.guard );
5065             break;
5066 
5067          case Ist_IMark:
5068             break;
5069 
5070          case Ist_NoOp:
5071          case Ist_MBE:
5072             break;
5073 
5074          case Ist_Dirty:
5075             do_shadow_Dirty( &mce, st->Ist.Dirty.details );
5076             break;
5077 
5078          case Ist_AbiHint:
5079             do_AbiHint( &mce, st->Ist.AbiHint.base,
5080                               st->Ist.AbiHint.len,
5081                               st->Ist.AbiHint.nia );
5082             break;
5083 
5084          case Ist_CAS:
5085             do_shadow_CAS( &mce, st->Ist.CAS.details );
5086             /* Note, do_shadow_CAS copies the CAS itself to the output
5087                block, because it needs to add instrumentation both
5088                before and after it.  Hence skip the copy below.  Also
5089                skip the origin-tracking stuff (call to schemeS) above,
5090                since that's all tangled up with it too; do_shadow_CAS
5091                does it all. */
5092             break;
5093 
5094          case Ist_LLSC:
5095             do_shadow_LLSC( &mce,
5096                             st->Ist.LLSC.end,
5097                             st->Ist.LLSC.result,
5098                             st->Ist.LLSC.addr,
5099                             st->Ist.LLSC.storedata );
5100             break;
5101 
5102          default:
5103             VG_(printf)("\n");
5104             ppIRStmt(st);
5105             VG_(printf)("\n");
5106             VG_(tool_panic)("memcheck: unhandled IRStmt");
5107 
5108       } /* switch (st->tag) */
5109 
5110       if (0 && verboze) {
5111          for (j = first_stmt; j < sb_out->stmts_used; j++) {
5112             VG_(printf)("   ");
5113             ppIRStmt(sb_out->stmts[j]);
5114             VG_(printf)("\n");
5115          }
5116          VG_(printf)("\n");
5117       }
5118 
5119       /* ... and finally copy the stmt itself to the output.  Except,
5120          skip the copy of IRCASs; see comments on case Ist_CAS
5121          above. */
5122       if (st->tag != Ist_CAS)
5123          stmt('C', &mce, st);
5124    }
5125 
5126    /* Now we need to complain if the jump target is undefined. */
5127    first_stmt = sb_out->stmts_used;
5128 
5129    if (verboze) {
5130       VG_(printf)("sb_in->next = ");
5131       ppIRExpr(sb_in->next);
5132       VG_(printf)("\n\n");
5133    }
5134 
5135    complainIfUndefined( &mce, sb_in->next );
5136 
5137    if (0 && verboze) {
5138       for (j = first_stmt; j < sb_out->stmts_used; j++) {
5139          VG_(printf)("   ");
5140          ppIRStmt(sb_out->stmts[j]);
5141          VG_(printf)("\n");
5142       }
5143       VG_(printf)("\n");
5144    }
5145 
5146    /* If this fails, there's been some serious snafu with tmp management,
5147       that should be investigated. */
5148    tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
5149    VG_(deleteXA)( mce.tmpMap );
5150 
5151    tl_assert(mce.sb == sb_out);
5152    return sb_out;
5153 }
5154 
5155 /*------------------------------------------------------------*/
5156 /*--- Post-tree-build final tidying                        ---*/
5157 /*------------------------------------------------------------*/
5158 
5159 /* This exploits the observation that Memcheck often produces
5160    repeated conditional calls of the form
5161 
5162    Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
5163 
5164    with the same guard expression G guarding the same helper call.
5165    The second and subsequent calls are redundant.  This usually
5166    results from instrumentation of guest code containing multiple
5167    memory references at different constant offsets from the same base
5168    register.  After optimisation of the instrumentation, you get a
5169    test for the definedness of the base register for each memory
5170    reference, which is kinda pointless.  MC_(final_tidy) therefore
5171    looks for such repeated calls and removes all but the first. */
5172 
5173 /* A struct for recording which (helper, guard) pairs we have already
5174    seen. */
5175 typedef
5176    struct { void* entry; IRExpr* guard; }
5177    Pair;
5178 
5179 /* Return True if e1 and e2 definitely denote the same value (used to
5180    compare guards).  Return False if unknown; False is the safe
5181    answer.  Since guest registers and guest memory do not have the
5182    SSA property we must return False if any Gets or Loads appear in
5183    the expression. */
5184 
sameIRValue(IRExpr * e1,IRExpr * e2)5185 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
5186 {
5187    if (e1->tag != e2->tag)
5188       return False;
5189    switch (e1->tag) {
5190       case Iex_Const:
5191          return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
5192       case Iex_Binop:
5193          return e1->Iex.Binop.op == e2->Iex.Binop.op
5194                 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
5195                 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
5196       case Iex_Unop:
5197          return e1->Iex.Unop.op == e2->Iex.Unop.op
5198                 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
5199       case Iex_RdTmp:
5200          return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
5201       case Iex_Mux0X:
5202          return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
5203                 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
5204                 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
5205       case Iex_Qop:
5206       case Iex_Triop:
5207       case Iex_CCall:
5208          /* be lazy.  Could define equality for these, but they never
5209             appear to be used. */
5210          return False;
5211       case Iex_Get:
5212       case Iex_GetI:
5213       case Iex_Load:
5214          /* be conservative - these may not give the same value each
5215             time */
5216          return False;
5217       case Iex_Binder:
5218          /* should never see this */
5219          /* fallthrough */
5220       default:
5221          VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5222          ppIRExpr(e1);
5223          VG_(tool_panic)("memcheck:sameIRValue");
5224          return False;
5225    }
5226 }
5227 
5228 /* See if 'pairs' already has an entry for (entry, guard).  Return
5229    True if so.  If not, add an entry. */
5230 
5231 static
check_or_add(XArray * pairs,IRExpr * guard,void * entry)5232 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5233 {
5234    Pair  p;
5235    Pair* pp;
5236    Int   i, n = VG_(sizeXA)( pairs );
5237    for (i = 0; i < n; i++) {
5238       pp = VG_(indexXA)( pairs, i );
5239       if (pp->entry == entry && sameIRValue(pp->guard, guard))
5240          return True;
5241    }
5242    p.guard = guard;
5243    p.entry = entry;
5244    VG_(addToXA)( pairs, &p );
5245    return False;
5246 }
5247 
is_helperc_value_checkN_fail(HChar * name)5248 static Bool is_helperc_value_checkN_fail ( HChar* name )
5249 {
5250    return
5251       0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5252       || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5253       || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5254       || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5255       || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5256       || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5257       || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5258       || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
5259 }
5260 
MC_(final_tidy)5261 IRSB* MC_(final_tidy) ( IRSB* sb_in )
5262 {
5263    Int i;
5264    IRStmt*   st;
5265    IRDirty*  di;
5266    IRExpr*   guard;
5267    IRCallee* cee;
5268    Bool      alreadyPresent;
5269    XArray*   pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5270                                  VG_(free), sizeof(Pair) );
5271    /* Scan forwards through the statements.  Each time a call to one
5272       of the relevant helpers is seen, check if we have made a
5273       previous call to the same helper using the same guard
5274       expression, and if so, delete the call. */
5275    for (i = 0; i < sb_in->stmts_used; i++) {
5276       st = sb_in->stmts[i];
5277       tl_assert(st);
5278       if (st->tag != Ist_Dirty)
5279          continue;
5280       di = st->Ist.Dirty.details;
5281       guard = di->guard;
5282       if (!guard)
5283          continue;
5284       if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5285       cee = di->cee;
5286       if (!is_helperc_value_checkN_fail( cee->name ))
5287          continue;
5288        /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5289           guard 'guard'.  Check if we have already seen a call to this
5290           function with the same guard.  If so, delete it.  If not,
5291           add it to the set of calls we do know about. */
5292       alreadyPresent = check_or_add( pairs, guard, cee->addr );
5293       if (alreadyPresent) {
5294          sb_in->stmts[i] = IRStmt_NoOp();
5295          if (0) VG_(printf)("XX\n");
5296       }
5297    }
5298    VG_(deleteXA)( pairs );
5299    return sb_in;
5300 }
5301 
5302 
5303 /*------------------------------------------------------------*/
5304 /*--- Origin tracking stuff                                ---*/
5305 /*------------------------------------------------------------*/
5306 
5307 /* Almost identical to findShadowTmpV. */
findShadowTmpB(MCEnv * mce,IRTemp orig)5308 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5309 {
5310    TempMapEnt* ent;
5311    /* VG_(indexXA) range-checks 'orig', hence no need to check
5312       here. */
5313    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5314    tl_assert(ent->kind == Orig);
5315    if (ent->shadowB == IRTemp_INVALID) {
5316       IRTemp tmpB
5317         = newTemp( mce, Ity_I32, BSh );
5318       /* newTemp may cause mce->tmpMap to resize, hence previous results
5319          from VG_(indexXA) are invalid. */
5320       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5321       tl_assert(ent->kind == Orig);
5322       tl_assert(ent->shadowB == IRTemp_INVALID);
5323       ent->shadowB = tmpB;
5324    }
5325    return ent->shadowB;
5326 }
5327 
gen_maxU32(MCEnv * mce,IRAtom * b1,IRAtom * b2)5328 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5329 {
5330    return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5331 }
5332 
gen_load_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset)5333 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5334                             IRAtom* baseaddr, Int offset )
5335 {
5336    void*    hFun;
5337    HChar*   hName;
5338    IRTemp   bTmp;
5339    IRDirty* di;
5340    IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5341    IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5342    IRAtom*  ea    = baseaddr;
5343    if (offset != 0) {
5344       IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5345                                    : mkU64( (Long)(Int)offset );
5346       ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5347    }
5348    bTmp = newTemp(mce, mce->hWordTy, BSh);
5349 
5350    switch (szB) {
5351       case 1: hFun  = (void*)&MC_(helperc_b_load1);
5352               hName = "MC_(helperc_b_load1)";
5353               break;
5354       case 2: hFun  = (void*)&MC_(helperc_b_load2);
5355               hName = "MC_(helperc_b_load2)";
5356               break;
5357       case 4: hFun  = (void*)&MC_(helperc_b_load4);
5358               hName = "MC_(helperc_b_load4)";
5359               break;
5360       case 8: hFun  = (void*)&MC_(helperc_b_load8);
5361               hName = "MC_(helperc_b_load8)";
5362               break;
5363       case 16: hFun  = (void*)&MC_(helperc_b_load16);
5364                hName = "MC_(helperc_b_load16)";
5365                break;
5366       default:
5367          VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5368          tl_assert(0);
5369    }
5370    di = unsafeIRDirty_1_N(
5371            bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5372            mkIRExprVec_1( ea )
5373         );
5374    /* no need to mess with any annotations.  This call accesses
5375       neither guest state nor guest memory. */
5376    stmt( 'B', mce, IRStmt_Dirty(di) );
5377    if (mce->hWordTy == Ity_I64) {
5378       /* 64-bit host */
5379       IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
5380       assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5381       return mkexpr(bTmp32);
5382    } else {
5383       /* 32-bit host */
5384       return mkexpr(bTmp);
5385    }
5386 }
5387 
5388 /* Generate a shadow store.  guard :: Ity_I1 controls whether the
5389    store really happens; NULL means it unconditionally does. */
gen_store_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset,IRAtom * dataB,IRAtom * guard)5390 static void gen_store_b ( MCEnv* mce, Int szB,
5391                           IRAtom* baseaddr, Int offset, IRAtom* dataB,
5392                           IRAtom* guard )
5393 {
5394    void*    hFun;
5395    HChar*   hName;
5396    IRDirty* di;
5397    IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5398    IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5399    IRAtom*  ea    = baseaddr;
5400    if (guard) {
5401       tl_assert(isOriginalAtom(mce, guard));
5402       tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5403    }
5404    if (offset != 0) {
5405       IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5406                                    : mkU64( (Long)(Int)offset );
5407       ea = assignNew(  'B', mce, aTy, binop(opAdd, ea, off));
5408    }
5409    if (mce->hWordTy == Ity_I64)
5410       dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5411 
5412    switch (szB) {
5413       case 1: hFun  = (void*)&MC_(helperc_b_store1);
5414               hName = "MC_(helperc_b_store1)";
5415               break;
5416       case 2: hFun  = (void*)&MC_(helperc_b_store2);
5417               hName = "MC_(helperc_b_store2)";
5418               break;
5419       case 4: hFun  = (void*)&MC_(helperc_b_store4);
5420               hName = "MC_(helperc_b_store4)";
5421               break;
5422       case 8: hFun  = (void*)&MC_(helperc_b_store8);
5423               hName = "MC_(helperc_b_store8)";
5424               break;
5425       case 16: hFun  = (void*)&MC_(helperc_b_store16);
5426                hName = "MC_(helperc_b_store16)";
5427                break;
5428       default:
5429          tl_assert(0);
5430    }
5431    di = unsafeIRDirty_0_N( 2/*regparms*/,
5432            hName, VG_(fnptr_to_fnentry)( hFun ),
5433            mkIRExprVec_2( ea, dataB )
5434         );
5435    /* no need to mess with any annotations.  This call accesses
5436       neither guest state nor guest memory. */
5437    if (guard) di->guard = guard;
5438    stmt( 'B', mce, IRStmt_Dirty(di) );
5439 }
5440 
narrowTo32(MCEnv * mce,IRAtom * e)5441 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
5442    IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5443    if (eTy == Ity_I64)
5444       return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5445    if (eTy == Ity_I32)
5446       return e;
5447    tl_assert(0);
5448 }
5449 
zWidenFrom32(MCEnv * mce,IRType dstTy,IRAtom * e)5450 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
5451    IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5452    tl_assert(eTy == Ity_I32);
5453    if (dstTy == Ity_I64)
5454       return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
5455    tl_assert(0);
5456 }
5457 
5458 
schemeE(MCEnv * mce,IRExpr * e)5459 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
5460 {
5461    tl_assert(MC_(clo_mc_level) == 3);
5462 
5463    switch (e->tag) {
5464 
5465       case Iex_GetI: {
5466          IRRegArray* descr_b;
5467          IRAtom      *t1, *t2, *t3, *t4;
5468          IRRegArray* descr      = e->Iex.GetI.descr;
5469          IRType equivIntTy
5470             = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5471          /* If this array is unshadowable for whatever reason, use the
5472             usual approximation. */
5473          if (equivIntTy == Ity_INVALID)
5474             return mkU32(0);
5475          tl_assert(sizeofIRType(equivIntTy) >= 4);
5476          tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5477          descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5478                                  equivIntTy, descr->nElems );
5479          /* Do a shadow indexed get of the same size, giving t1.  Take
5480             the bottom 32 bits of it, giving t2.  Compute into t3 the
5481             origin for the index (almost certainly zero, but there's
5482             no harm in being completely general here, since iropt will
5483             remove any useless code), and fold it in, giving a final
5484             value t4. */
5485          t1 = assignNew( 'B', mce, equivIntTy,
5486                           IRExpr_GetI( descr_b, e->Iex.GetI.ix,
5487                                                 e->Iex.GetI.bias ));
5488          t2 = narrowTo32( mce, t1 );
5489          t3 = schemeE( mce, e->Iex.GetI.ix );
5490          t4 = gen_maxU32( mce, t2, t3 );
5491          return t4;
5492       }
5493       case Iex_CCall: {
5494          Int i;
5495          IRAtom*  here;
5496          IRExpr** args = e->Iex.CCall.args;
5497          IRAtom*  curr = mkU32(0);
5498          for (i = 0; args[i]; i++) {
5499             tl_assert(i < 32);
5500             tl_assert(isOriginalAtom(mce, args[i]));
5501             /* Only take notice of this arg if the callee's
5502                mc-exclusion mask does not say it is to be excluded. */
5503             if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
5504                /* the arg is to be excluded from definedness checking.
5505                   Do nothing. */
5506                if (0) VG_(printf)("excluding %s(%d)\n",
5507                                   e->Iex.CCall.cee->name, i);
5508             } else {
5509                /* calculate the arg's definedness, and pessimistically
5510                   merge it in. */
5511                here = schemeE( mce, args[i] );
5512                curr = gen_maxU32( mce, curr, here );
5513             }
5514          }
5515          return curr;
5516       }
5517       case Iex_Load: {
5518          Int dszB;
5519          dszB = sizeofIRType(e->Iex.Load.ty);
5520          /* assert that the B value for the address is already
5521             available (somewhere) */
5522          tl_assert(isIRAtom(e->Iex.Load.addr));
5523          tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
5524          return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
5525       }
5526       case Iex_Mux0X: {
5527          IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
5528          IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
5529          IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
5530          return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
5531       }
5532       case Iex_Qop: {
5533          IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 );
5534          IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 );
5535          IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 );
5536          IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 );
5537          return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
5538                                  gen_maxU32( mce, b3, b4 ) );
5539       }
5540       case Iex_Triop: {
5541          IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 );
5542          IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 );
5543          IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 );
5544          return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
5545       }
5546       case Iex_Binop: {
5547          switch (e->Iex.Binop.op) {
5548             case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
5549             case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
5550             case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
5551             case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
5552                /* Just say these all produce a defined result,
5553                   regardless of their arguments.  See
5554                   COMMENT_ON_CasCmpEQ in this file. */
5555                return mkU32(0);
5556             default: {
5557                IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
5558                IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
5559                return gen_maxU32( mce, b1, b2 );
5560             }
5561          }
5562          tl_assert(0);
5563          /*NOTREACHED*/
5564       }
5565       case Iex_Unop: {
5566          IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
5567          return b1;
5568       }
5569       case Iex_Const:
5570          return mkU32(0);
5571       case Iex_RdTmp:
5572          return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
5573       case Iex_Get: {
5574          Int b_offset = MC_(get_otrack_shadow_offset)(
5575                            e->Iex.Get.offset,
5576                            sizeofIRType(e->Iex.Get.ty)
5577                         );
5578          tl_assert(b_offset >= -1
5579                    && b_offset <= mce->layout->total_sizeB -4);
5580          if (b_offset >= 0) {
5581             /* FIXME: this isn't an atom! */
5582             return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
5583                                Ity_I32 );
5584          }
5585          return mkU32(0);
5586       }
5587       default:
5588          VG_(printf)("mc_translate.c: schemeE: unhandled: ");
5589          ppIRExpr(e);
5590          VG_(tool_panic)("memcheck:schemeE");
5591    }
5592 }
5593 
5594 
do_origins_Dirty(MCEnv * mce,IRDirty * d)5595 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
5596 {
5597    // This is a hacked version of do_shadow_Dirty
5598    Int       i, n, toDo, gSz, gOff;
5599    IRAtom    *here, *curr;
5600    IRTemp    dst;
5601 
5602    /* First check the guard. */
5603    curr = schemeE( mce, d->guard );
5604 
5605    /* Now round up all inputs and maxU32 over them. */
5606 
5607    /* Inputs: unmasked args */
5608    for (i = 0; d->args[i]; i++) {
5609       if (d->cee->mcx_mask & (1<<i)) {
5610          /* ignore this arg */
5611       } else {
5612          here = schemeE( mce, d->args[i] );
5613          curr = gen_maxU32( mce, curr, here );
5614       }
5615    }
5616 
5617    /* Inputs: guest state that we read. */
5618    for (i = 0; i < d->nFxState; i++) {
5619       tl_assert(d->fxState[i].fx != Ifx_None);
5620       if (d->fxState[i].fx == Ifx_Write)
5621          continue;
5622 
5623       /* Ignore any sections marked as 'always defined'. */
5624       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
5625          if (0)
5626          VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5627                      d->fxState[i].offset, d->fxState[i].size );
5628          continue;
5629       }
5630 
5631       /* This state element is read or modified.  So we need to
5632          consider it.  If larger than 4 bytes, deal with it in 4-byte
5633          chunks. */
5634       gSz  = d->fxState[i].size;
5635       gOff = d->fxState[i].offset;
5636       tl_assert(gSz > 0);
5637       while (True) {
5638          Int b_offset;
5639          if (gSz == 0) break;
5640          n = gSz <= 4 ? gSz : 4;
5641          /* update 'curr' with maxU32 of the state slice
5642             gOff .. gOff+n-1 */
5643          b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5644          if (b_offset != -1) {
5645             here = assignNew( 'B',mce,
5646                                Ity_I32,
5647                                IRExpr_Get(b_offset + 2*mce->layout->total_sizeB,
5648                                           Ity_I32));
5649             curr = gen_maxU32( mce, curr, here );
5650          }
5651          gSz -= n;
5652          gOff += n;
5653       }
5654 
5655    }
5656 
5657    /* Inputs: memory */
5658 
5659    if (d->mFx != Ifx_None) {
5660       /* Because we may do multiple shadow loads/stores from the same
5661          base address, it's best to do a single test of its
5662          definedness right now.  Post-instrumentation optimisation
5663          should remove all but this test. */
5664       tl_assert(d->mAddr);
5665       here = schemeE( mce, d->mAddr );
5666       curr = gen_maxU32( mce, curr, here );
5667    }
5668 
5669    /* Deal with memory inputs (reads or modifies) */
5670    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
5671       toDo   = d->mSize;
5672       /* chew off 32-bit chunks.  We don't care about the endianness
5673          since it's all going to be condensed down to a single bit,
5674          but nevertheless choose an endianness which is hopefully
5675          native to the platform. */
5676       while (toDo >= 4) {
5677          here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo );
5678          curr = gen_maxU32( mce, curr, here );
5679          toDo -= 4;
5680       }
5681       /* handle possible 16-bit excess */
5682       while (toDo >= 2) {
5683          here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo );
5684          curr = gen_maxU32( mce, curr, here );
5685          toDo -= 2;
5686       }
5687       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
5688    }
5689 
5690    /* Whew!  So curr is a 32-bit B-value which should give an origin
5691       of some use if any of the inputs to the helper are undefined.
5692       Now we need to re-distribute the results to all destinations. */
5693 
5694    /* Outputs: the destination temporary, if there is one. */
5695    if (d->tmp != IRTemp_INVALID) {
5696       dst   = findShadowTmpB(mce, d->tmp);
5697       assign( 'V', mce, dst, curr );
5698    }
5699 
5700    /* Outputs: guest state that we write or modify. */
5701    for (i = 0; i < d->nFxState; i++) {
5702       tl_assert(d->fxState[i].fx != Ifx_None);
5703       if (d->fxState[i].fx == Ifx_Read)
5704          continue;
5705 
5706       /* Ignore any sections marked as 'always defined'. */
5707       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
5708          continue;
5709 
5710       /* This state element is written or modified.  So we need to
5711          consider it.  If larger than 4 bytes, deal with it in 4-byte
5712          chunks. */
5713       gSz  = d->fxState[i].size;
5714       gOff = d->fxState[i].offset;
5715       tl_assert(gSz > 0);
5716       while (True) {
5717          Int b_offset;
5718          if (gSz == 0) break;
5719          n = gSz <= 4 ? gSz : 4;
5720          /* Write 'curr' to the state slice gOff .. gOff+n-1 */
5721          b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5722          if (b_offset != -1) {
5723            stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5724                                       curr ));
5725          }
5726          gSz -= n;
5727          gOff += n;
5728       }
5729    }
5730 
5731    /* Outputs: memory that we write or modify.  Same comments about
5732       endianness as above apply. */
5733    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
5734       toDo   = d->mSize;
5735       /* chew off 32-bit chunks */
5736       while (toDo >= 4) {
5737          gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
5738                       NULL/*guard*/ );
5739          toDo -= 4;
5740       }
5741       /* handle possible 16-bit excess */
5742       while (toDo >= 2) {
5743         gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
5744                      NULL/*guard*/ );
5745          toDo -= 2;
5746       }
5747       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
5748    }
5749 }
5750 
5751 
do_origins_Store(MCEnv * mce,IREndness stEnd,IRExpr * stAddr,IRExpr * stData)5752 static void do_origins_Store ( MCEnv* mce,
5753                                IREndness stEnd,
5754                                IRExpr* stAddr,
5755                                IRExpr* stData )
5756 {
5757    Int     dszB;
5758    IRAtom* dataB;
5759    /* assert that the B value for the address is already available
5760       (somewhere), since the call to schemeE will want to see it.
5761       XXXX how does this actually ensure that?? */
5762    tl_assert(isIRAtom(stAddr));
5763    tl_assert(isIRAtom(stData));
5764    dszB  = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
5765    dataB = schemeE( mce, stData );
5766    gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
5767                      NULL/*guard*/ );
5768 }
5769 
5770 
schemeS(MCEnv * mce,IRStmt * st)5771 static void schemeS ( MCEnv* mce, IRStmt* st )
5772 {
5773    tl_assert(MC_(clo_mc_level) == 3);
5774 
5775    switch (st->tag) {
5776 
5777       case Ist_AbiHint:
5778          /* The value-check instrumenter handles this - by arranging
5779             to pass the address of the next instruction to
5780             MC_(helperc_MAKE_STACK_UNINIT).  This is all that needs to
5781             happen for origin tracking w.r.t. AbiHints.  So there is
5782             nothing to do here. */
5783          break;
5784 
5785       case Ist_PutI: {
5786          IRRegArray* descr_b;
5787          IRAtom      *t1, *t2, *t3, *t4;
5788          IRRegArray* descr = st->Ist.PutI.descr;
5789          IRType equivIntTy
5790             = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5791          /* If this array is unshadowable for whatever reason,
5792             generate no code. */
5793          if (equivIntTy == Ity_INVALID)
5794             break;
5795          tl_assert(sizeofIRType(equivIntTy) >= 4);
5796          tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5797          descr_b
5798             = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5799                             equivIntTy, descr->nElems );
5800          /* Compute a value to Put - the conjoinment of the origin for
5801             the data to be Put-ted (obviously) and of the index value
5802             (not so obviously). */
5803          t1 = schemeE( mce, st->Ist.PutI.data );
5804          t2 = schemeE( mce, st->Ist.PutI.ix );
5805          t3 = gen_maxU32( mce, t1, t2 );
5806          t4 = zWidenFrom32( mce, equivIntTy, t3 );
5807          stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix,
5808                                       st->Ist.PutI.bias, t4 ));
5809          break;
5810       }
5811 
5812       case Ist_Dirty:
5813          do_origins_Dirty( mce, st->Ist.Dirty.details );
5814          break;
5815 
5816       case Ist_Store:
5817          do_origins_Store( mce, st->Ist.Store.end,
5818                                 st->Ist.Store.addr,
5819                                 st->Ist.Store.data );
5820          break;
5821 
5822       case Ist_LLSC: {
5823          /* In short: treat a load-linked like a normal load followed
5824             by an assignment of the loaded (shadow) data the result
5825             temporary.  Treat a store-conditional like a normal store,
5826             and mark the result temporary as defined. */
5827          if (st->Ist.LLSC.storedata == NULL) {
5828             /* Load Linked */
5829             IRType resTy
5830                = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
5831             IRExpr* vanillaLoad
5832                = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
5833             tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5834                       || resTy == Ity_I16 || resTy == Ity_I8);
5835             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5836                               schemeE(mce, vanillaLoad));
5837          } else {
5838             /* Store conditional */
5839             do_origins_Store( mce, st->Ist.LLSC.end,
5840                                    st->Ist.LLSC.addr,
5841                                    st->Ist.LLSC.storedata );
5842             /* For the rationale behind this, see comments at the
5843                place where the V-shadow for .result is constructed, in
5844                do_shadow_LLSC.  In short, we regard .result as
5845                always-defined. */
5846             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5847                               mkU32(0) );
5848          }
5849          break;
5850       }
5851 
5852       case Ist_Put: {
5853          Int b_offset
5854             = MC_(get_otrack_shadow_offset)(
5855                  st->Ist.Put.offset,
5856                  sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
5857               );
5858          if (b_offset >= 0) {
5859             /* FIXME: this isn't an atom! */
5860             stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5861                                        schemeE( mce, st->Ist.Put.data )) );
5862          }
5863          break;
5864       }
5865 
5866       case Ist_WrTmp:
5867          assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
5868                            schemeE(mce, st->Ist.WrTmp.data) );
5869          break;
5870 
5871       case Ist_MBE:
5872       case Ist_NoOp:
5873       case Ist_Exit:
5874       case Ist_IMark:
5875          break;
5876 
5877       default:
5878          VG_(printf)("mc_translate.c: schemeS: unhandled: ");
5879          ppIRStmt(st);
5880          VG_(tool_panic)("memcheck:schemeS");
5881    }
5882 }
5883 
5884 
5885 /*--------------------------------------------------------------------*/
5886 /*--- end                                           mc_translate.c ---*/
5887 /*--------------------------------------------------------------------*/
5888