• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations.         ---*/
4 /*---                                               mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of MemCheck, a heavyweight Valgrind tool for
9    detecting memory errors.
10 
11    Copyright (C) 2000-2012 Julian Seward
12       jseward@acm.org
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #include "pub_tool_basics.h"
33 #include "pub_tool_poolalloc.h"     // For mc_include.h
34 #include "pub_tool_hashtable.h"     // For mc_include.h
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_tooliface.h"
38 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
39 #include "pub_tool_xarray.h"
40 #include "pub_tool_mallocfree.h"
41 #include "pub_tool_libcbase.h"
42 
43 #include "mc_include.h"
44 
45 
46 /* FIXMEs JRS 2011-June-16.
47 
48    Check the interpretation for vector narrowing and widening ops,
49    particularly the saturating ones.  I suspect they are either overly
50    pessimistic and/or wrong.
51 */
52 
53 /* This file implements the Memcheck instrumentation, and in
54    particular contains the core of its undefined value detection
55    machinery.  For a comprehensive background of the terminology,
56    algorithms and rationale used herein, read:
57 
58      Using Valgrind to detect undefined value errors with
59      bit-precision
60 
61      Julian Seward and Nicholas Nethercote
62 
63      2005 USENIX Annual Technical Conference (General Track),
64      Anaheim, CA, USA, April 10-15, 2005.
65 
66    ----
67 
68    Here is as good a place as any to record exactly when V bits are and
69    should be checked, why, and what function is responsible.
70 
71 
72    Memcheck complains when an undefined value is used:
73 
74    1. In the condition of a conditional branch.  Because it could cause
75       incorrect control flow, and thus cause incorrect externally-visible
76       behaviour.  [mc_translate.c:complainIfUndefined]
77 
78    2. As an argument to a system call, or as the value that specifies
79       the system call number.  Because it could cause an incorrect
80       externally-visible side effect.  [mc_translate.c:mc_pre_reg_read]
81 
82    3. As the address in a load or store.  Because it could cause an
83       incorrect value to be used later, which could cause externally-visible
84       behaviour (eg. via incorrect control flow or an incorrect system call
85       argument)  [complainIfUndefined]
86 
87    4. As the target address of a branch.  Because it could cause incorrect
88       control flow.  [complainIfUndefined]
89 
90    5. As an argument to setenv, unsetenv, or putenv.  Because it could put
91       an incorrect value into the external environment.
92       [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
93 
94    6. As the index in a GETI or PUTI operation.  I'm not sure why... (njn).
95       [complainIfUndefined]
96 
97    7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
98       VALGRIND_CHECK_VALUE_IS_DEFINED client requests.  Because the user
99       requested it.  [in memcheck.h]
100 
101 
102    Memcheck also complains, but should not, when an undefined value is used:
103 
104    8. As the shift value in certain SIMD shift operations (but not in the
105       standard integer shift operations).  This inconsistency is due to
106       historical reasons.)  [complainIfUndefined]
107 
108 
109    Memcheck does not complain, but should, when an undefined value is used:
110 
111    9. As an input to a client request.  Because the client request may
112       affect the visible behaviour -- see bug #144362 for an example
113       involving the malloc replacements in vg_replace_malloc.c and
114       VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
115       isn't identified.  That bug report also has some info on how to solve
116       the problem.  [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
117 
118 
119    In practice, 1 and 2 account for the vast majority of cases.
120 */
121 
122 /*------------------------------------------------------------*/
123 /*--- Forward decls                                        ---*/
124 /*------------------------------------------------------------*/
125 
126 struct _MCEnv;
127 
128 static IRType  shadowTypeV ( IRType ty );
129 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
130 static IRTemp  findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
131 
132 static IRExpr *i128_const_zero(void);
133 
134 /*------------------------------------------------------------*/
135 /*--- Memcheck running state, and tmp management.          ---*/
136 /*------------------------------------------------------------*/
137 
138 /* Carries info about a particular tmp.  The tmp's number is not
139    recorded, as this is implied by (equal to) its index in the tmpMap
140    in MCEnv.  The tmp's type is also not recorded, as this is present
141    in MCEnv.sb->tyenv.
142 
143    When .kind is Orig, .shadowV and .shadowB may give the identities
144    of the temps currently holding the associated definedness (shadowV)
145    and origin (shadowB) values, or these may be IRTemp_INVALID if code
146    to compute such values has not yet been emitted.
147 
148    When .kind is VSh or BSh then the tmp is holds a V- or B- value,
149    and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
150    illogical for a shadow tmp itself to be shadowed.
151 */
152 typedef
153    enum { Orig=1, VSh=2, BSh=3 }
154    TempKind;
155 
156 typedef
157    struct {
158       TempKind kind;
159       IRTemp   shadowV;
160       IRTemp   shadowB;
161    }
162    TempMapEnt;
163 
164 
165 /* Carries around state during memcheck instrumentation. */
166 typedef
167    struct _MCEnv {
168       /* MODIFIED: the superblock being constructed.  IRStmts are
169          added. */
170       IRSB* sb;
171       Bool  trace;
172 
173       /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
174          current kind and possibly shadow temps for each temp in the
175          IRSB being constructed.  Note that it does not contain the
176          type of each tmp.  If you want to know the type, look at the
177          relevant entry in sb->tyenv.  It follows that at all times
178          during the instrumentation process, the valid indices for
179          tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
180          total number of Orig, V- and B- temps allocated so far.
181 
182          The reason for this strange split (types in one place, all
183          other info in another) is that we need the types to be
184          attached to sb so as to make it possible to do
185          "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
186          instrumentation process. */
187       XArray* /* of TempMapEnt */ tmpMap;
188 
189       /* MODIFIED: indicates whether "bogus" literals have so far been
190          found.  Starts off False, and may change to True. */
191       Bool bogusLiterals;
192 
193       /* READONLY: indicates whether we should use expensive
194          interpretations of integer adds, since unfortunately LLVM
195          uses them to do ORs in some circumstances.  Defaulted to True
196          on MacOS and False everywhere else. */
197       Bool useLLVMworkarounds;
198 
199       /* READONLY: the guest layout.  This indicates which parts of
200          the guest state should be regarded as 'always defined'. */
201       VexGuestLayout* layout;
202 
203       /* READONLY: the host word type.  Needed for constructing
204          arguments of type 'HWord' to be passed to helper functions.
205          Ity_I32 or Ity_I64 only. */
206       IRType hWordTy;
207    }
208    MCEnv;
209 
210 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
211    demand), as they are encountered.  This is for two reasons.
212 
213    (1) (less important reason): Many original tmps are unused due to
214    initial IR optimisation, and we do not want to spaces in tables
215    tracking them.
216 
217    Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
218    table indexed [0 .. n_types-1], which gives the current shadow for
219    each original tmp, or INVALID_IRTEMP if none is so far assigned.
220    It is necessary to support making multiple assignments to a shadow
221    -- specifically, after testing a shadow for definedness, it needs
222    to be made defined.  But IR's SSA property disallows this.
223 
224    (2) (more important reason): Therefore, when a shadow needs to get
225    a new value, a new temporary is created, the value is assigned to
226    that, and the tmpMap is updated to reflect the new binding.
227 
228    A corollary is that if the tmpMap maps a given tmp to
229    IRTemp_INVALID and we are hoping to read that shadow tmp, it means
230    there's a read-before-write error in the original tmps.  The IR
231    sanity checker should catch all such anomalies, however.
232 */
233 
234 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
235    both the table in mce->sb and to our auxiliary mapping.  Note that
236    newTemp may cause mce->tmpMap to resize, hence previous results
237    from VG_(indexXA)(mce->tmpMap) are invalidated. */
newTemp(MCEnv * mce,IRType ty,TempKind kind)238 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
239 {
240    Word       newIx;
241    TempMapEnt ent;
242    IRTemp     tmp = newIRTemp(mce->sb->tyenv, ty);
243    ent.kind    = kind;
244    ent.shadowV = IRTemp_INVALID;
245    ent.shadowB = IRTemp_INVALID;
246    newIx = VG_(addToXA)( mce->tmpMap, &ent );
247    tl_assert(newIx == (Word)tmp);
248    return tmp;
249 }
250 
251 
252 /* Find the tmp currently shadowing the given original tmp.  If none
253    so far exists, allocate one.  */
findShadowTmpV(MCEnv * mce,IRTemp orig)254 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
255 {
256    TempMapEnt* ent;
257    /* VG_(indexXA) range-checks 'orig', hence no need to check
258       here. */
259    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
260    tl_assert(ent->kind == Orig);
261    if (ent->shadowV == IRTemp_INVALID) {
262       IRTemp tmpV
263         = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
264       /* newTemp may cause mce->tmpMap to resize, hence previous results
265          from VG_(indexXA) are invalid. */
266       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
267       tl_assert(ent->kind == Orig);
268       tl_assert(ent->shadowV == IRTemp_INVALID);
269       ent->shadowV = tmpV;
270    }
271    return ent->shadowV;
272 }
273 
274 /* Allocate a new shadow for the given original tmp.  This means any
275    previous shadow is abandoned.  This is needed because it is
276    necessary to give a new value to a shadow once it has been tested
277    for undefinedness, but unfortunately IR's SSA property disallows
278    this.  Instead we must abandon the old shadow, allocate a new one
279    and use that instead.
280 
281    This is the same as findShadowTmpV, except we don't bother to see
282    if a shadow temp already existed -- we simply allocate a new one
283    regardless. */
newShadowTmpV(MCEnv * mce,IRTemp orig)284 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
285 {
286    TempMapEnt* ent;
287    /* VG_(indexXA) range-checks 'orig', hence no need to check
288       here. */
289    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
290    tl_assert(ent->kind == Orig);
291    if (1) {
292       IRTemp tmpV
293         = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
294       /* newTemp may cause mce->tmpMap to resize, hence previous results
295          from VG_(indexXA) are invalid. */
296       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
297       tl_assert(ent->kind == Orig);
298       ent->shadowV = tmpV;
299    }
300 }
301 
302 
303 /*------------------------------------------------------------*/
304 /*--- IRAtoms -- a subset of IRExprs                       ---*/
305 /*------------------------------------------------------------*/
306 
307 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
308    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
309    input, most of this code deals in atoms.  Usefully, a value atom
310    always has a V-value which is also an atom: constants are shadowed
311    by constants, and temps are shadowed by the corresponding shadow
312    temporary. */
313 
314 typedef  IRExpr  IRAtom;
315 
316 /* (used for sanity checks only): is this an atom which looks
317    like it's from original code? */
isOriginalAtom(MCEnv * mce,IRAtom * a1)318 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
319 {
320    if (a1->tag == Iex_Const)
321       return True;
322    if (a1->tag == Iex_RdTmp) {
323       TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
324       return ent->kind == Orig;
325    }
326    return False;
327 }
328 
329 /* (used for sanity checks only): is this an atom which looks
330    like it's from shadow code? */
isShadowAtom(MCEnv * mce,IRAtom * a1)331 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
332 {
333    if (a1->tag == Iex_Const)
334       return True;
335    if (a1->tag == Iex_RdTmp) {
336       TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
337       return ent->kind == VSh || ent->kind == BSh;
338    }
339    return False;
340 }
341 
342 /* (used for sanity checks only): check that both args are atoms and
343    are identically-kinded. */
sameKindedAtoms(IRAtom * a1,IRAtom * a2)344 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
345 {
346    if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
347       return True;
348    if (a1->tag == Iex_Const && a2->tag == Iex_Const)
349       return True;
350    return False;
351 }
352 
353 
354 /*------------------------------------------------------------*/
355 /*--- Type management                                      ---*/
356 /*------------------------------------------------------------*/
357 
358 /* Shadow state is always accessed using integer types.  This returns
359    an integer type with the same size (as per sizeofIRType) as the
360    given type.  The only valid shadow types are Bit, I8, I16, I32,
361    I64, I128, V128, V256. */
362 
shadowTypeV(IRType ty)363 static IRType shadowTypeV ( IRType ty )
364 {
365    switch (ty) {
366       case Ity_I1:
367       case Ity_I8:
368       case Ity_I16:
369       case Ity_I32:
370       case Ity_I64:
371       case Ity_I128: return ty;
372       case Ity_F32:  return Ity_I32;
373       case Ity_D32:  return Ity_I32;
374       case Ity_F64:  return Ity_I64;
375       case Ity_D64:  return Ity_I64;
376       case Ity_F128: return Ity_I128;
377       case Ity_D128: return Ity_I128;
378       case Ity_V128: return Ity_V128;
379       case Ity_V256: return Ity_V256;
380       default: ppIRType(ty);
381                VG_(tool_panic)("memcheck:shadowTypeV");
382    }
383 }
384 
385 /* Produce a 'defined' value of the given shadow type.  Should only be
386    supplied shadow types (Bit/I8/I16/I32/UI64). */
definedOfType(IRType ty)387 static IRExpr* definedOfType ( IRType ty ) {
388    switch (ty) {
389       case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
390       case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
391       case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
392       case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
393       case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
394       case Ity_I128: return i128_const_zero();
395       case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
396       default:       VG_(tool_panic)("memcheck:definedOfType");
397    }
398 }
399 
400 
401 /*------------------------------------------------------------*/
402 /*--- Constructing IR fragments                            ---*/
403 /*------------------------------------------------------------*/
404 
405 /* add stmt to a bb */
stmt(HChar cat,MCEnv * mce,IRStmt * st)406 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
407    if (mce->trace) {
408       VG_(printf)("  %c: ", cat);
409       ppIRStmt(st);
410       VG_(printf)("\n");
411    }
412    addStmtToIRSB(mce->sb, st);
413 }
414 
415 /* assign value to tmp */
416 static inline
assign(HChar cat,MCEnv * mce,IRTemp tmp,IRExpr * expr)417 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
418    stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
419 }
420 
421 /* build various kinds of expressions */
422 #define triop(_op, _arg1, _arg2, _arg3) \
423                                  IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
424 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
425 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
426 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
427 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
428 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
429 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
430 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
431 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
432 
433 /* Bind the given expression to a new temporary, and return the
434    temporary.  This effectively converts an arbitrary expression into
435    an atom.
436 
437    'ty' is the type of 'e' and hence the type that the new temporary
438    needs to be.  But passing it in is redundant, since we can deduce
439    the type merely by inspecting 'e'.  So at least use that fact to
440    assert that the two types agree. */
assignNew(HChar cat,MCEnv * mce,IRType ty,IRExpr * e)441 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
442 {
443    TempKind k;
444    IRTemp   t;
445    IRType   tyE = typeOfIRExpr(mce->sb->tyenv, e);
446 
447    tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
448    switch (cat) {
449       case 'V': k = VSh;  break;
450       case 'B': k = BSh;  break;
451       case 'C': k = Orig; break;
452                 /* happens when we are making up new "orig"
453                    expressions, for IRCAS handling */
454       default: tl_assert(0);
455    }
456    t = newTemp(mce, ty, k);
457    assign(cat, mce, t, e);
458    return mkexpr(t);
459 }
460 
461 
462 /*------------------------------------------------------------*/
463 /*--- Helper functions for 128-bit ops                     ---*/
464 /*------------------------------------------------------------*/
465 
i128_const_zero(void)466 static IRExpr *i128_const_zero(void)
467 {
468    IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
469    return binop(Iop_64HLto128, z64, z64);
470 }
471 
472 /* There are no I128-bit loads and/or stores [as generated by any
473    current front ends].  So we do not need to worry about that in
474    expr2vbits_Load */
475 
476 
477 /*------------------------------------------------------------*/
478 /*--- Constructing definedness primitive ops               ---*/
479 /*------------------------------------------------------------*/
480 
481 /* --------- Defined-if-either-defined --------- */
482 
mkDifD8(MCEnv * mce,IRAtom * a1,IRAtom * a2)483 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
484    tl_assert(isShadowAtom(mce,a1));
485    tl_assert(isShadowAtom(mce,a2));
486    return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
487 }
488 
mkDifD16(MCEnv * mce,IRAtom * a1,IRAtom * a2)489 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
490    tl_assert(isShadowAtom(mce,a1));
491    tl_assert(isShadowAtom(mce,a2));
492    return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
493 }
494 
mkDifD32(MCEnv * mce,IRAtom * a1,IRAtom * a2)495 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
496    tl_assert(isShadowAtom(mce,a1));
497    tl_assert(isShadowAtom(mce,a2));
498    return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
499 }
500 
mkDifD64(MCEnv * mce,IRAtom * a1,IRAtom * a2)501 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
502    tl_assert(isShadowAtom(mce,a1));
503    tl_assert(isShadowAtom(mce,a2));
504    return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
505 }
506 
mkDifDV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)507 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
508    tl_assert(isShadowAtom(mce,a1));
509    tl_assert(isShadowAtom(mce,a2));
510    return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
511 }
512 
mkDifDV256(MCEnv * mce,IRAtom * a1,IRAtom * a2)513 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
514    tl_assert(isShadowAtom(mce,a1));
515    tl_assert(isShadowAtom(mce,a2));
516    return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
517 }
518 
519 /* --------- Undefined-if-either-undefined --------- */
520 
mkUifU8(MCEnv * mce,IRAtom * a1,IRAtom * a2)521 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
522    tl_assert(isShadowAtom(mce,a1));
523    tl_assert(isShadowAtom(mce,a2));
524    return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
525 }
526 
mkUifU16(MCEnv * mce,IRAtom * a1,IRAtom * a2)527 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
528    tl_assert(isShadowAtom(mce,a1));
529    tl_assert(isShadowAtom(mce,a2));
530    return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
531 }
532 
mkUifU32(MCEnv * mce,IRAtom * a1,IRAtom * a2)533 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
534    tl_assert(isShadowAtom(mce,a1));
535    tl_assert(isShadowAtom(mce,a2));
536    return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
537 }
538 
mkUifU64(MCEnv * mce,IRAtom * a1,IRAtom * a2)539 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
540    tl_assert(isShadowAtom(mce,a1));
541    tl_assert(isShadowAtom(mce,a2));
542    return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
543 }
544 
mkUifU128(MCEnv * mce,IRAtom * a1,IRAtom * a2)545 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
546    IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
547    tl_assert(isShadowAtom(mce,a1));
548    tl_assert(isShadowAtom(mce,a2));
549    tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
550    tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
551    tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
552    tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
553    tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
554    tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
555 
556    return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
557 }
558 
mkUifUV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)559 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
560    tl_assert(isShadowAtom(mce,a1));
561    tl_assert(isShadowAtom(mce,a2));
562    return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
563 }
564 
mkUifUV256(MCEnv * mce,IRAtom * a1,IRAtom * a2)565 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
566    tl_assert(isShadowAtom(mce,a1));
567    tl_assert(isShadowAtom(mce,a2));
568    return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
569 }
570 
mkUifU(MCEnv * mce,IRType vty,IRAtom * a1,IRAtom * a2)571 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
572    switch (vty) {
573       case Ity_I8:   return mkUifU8(mce, a1, a2);
574       case Ity_I16:  return mkUifU16(mce, a1, a2);
575       case Ity_I32:  return mkUifU32(mce, a1, a2);
576       case Ity_I64:  return mkUifU64(mce, a1, a2);
577       case Ity_I128: return mkUifU128(mce, a1, a2);
578       case Ity_V128: return mkUifUV128(mce, a1, a2);
579       default:
580          VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
581          VG_(tool_panic)("memcheck:mkUifU");
582    }
583 }
584 
585 /* --------- The Left-family of operations. --------- */
586 
mkLeft8(MCEnv * mce,IRAtom * a1)587 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
588    tl_assert(isShadowAtom(mce,a1));
589    return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
590 }
591 
mkLeft16(MCEnv * mce,IRAtom * a1)592 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
593    tl_assert(isShadowAtom(mce,a1));
594    return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
595 }
596 
mkLeft32(MCEnv * mce,IRAtom * a1)597 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
598    tl_assert(isShadowAtom(mce,a1));
599    return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
600 }
601 
mkLeft64(MCEnv * mce,IRAtom * a1)602 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
603    tl_assert(isShadowAtom(mce,a1));
604    return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
605 }
606 
607 /* --------- 'Improvement' functions for AND/OR. --------- */
608 
609 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
610    defined (0); all other -> undefined (1).
611 */
mkImproveAND8(MCEnv * mce,IRAtom * data,IRAtom * vbits)612 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
613 {
614    tl_assert(isOriginalAtom(mce, data));
615    tl_assert(isShadowAtom(mce, vbits));
616    tl_assert(sameKindedAtoms(data, vbits));
617    return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
618 }
619 
mkImproveAND16(MCEnv * mce,IRAtom * data,IRAtom * vbits)620 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
621 {
622    tl_assert(isOriginalAtom(mce, data));
623    tl_assert(isShadowAtom(mce, vbits));
624    tl_assert(sameKindedAtoms(data, vbits));
625    return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
626 }
627 
mkImproveAND32(MCEnv * mce,IRAtom * data,IRAtom * vbits)628 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629 {
630    tl_assert(isOriginalAtom(mce, data));
631    tl_assert(isShadowAtom(mce, vbits));
632    tl_assert(sameKindedAtoms(data, vbits));
633    return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
634 }
635 
mkImproveAND64(MCEnv * mce,IRAtom * data,IRAtom * vbits)636 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
637 {
638    tl_assert(isOriginalAtom(mce, data));
639    tl_assert(isShadowAtom(mce, vbits));
640    tl_assert(sameKindedAtoms(data, vbits));
641    return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
642 }
643 
mkImproveANDV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)644 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
645 {
646    tl_assert(isOriginalAtom(mce, data));
647    tl_assert(isShadowAtom(mce, vbits));
648    tl_assert(sameKindedAtoms(data, vbits));
649    return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
650 }
651 
mkImproveANDV256(MCEnv * mce,IRAtom * data,IRAtom * vbits)652 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653 {
654    tl_assert(isOriginalAtom(mce, data));
655    tl_assert(isShadowAtom(mce, vbits));
656    tl_assert(sameKindedAtoms(data, vbits));
657    return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
658 }
659 
660 /* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
661    defined (0); all other -> undefined (1).
662 */
mkImproveOR8(MCEnv * mce,IRAtom * data,IRAtom * vbits)663 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
664 {
665    tl_assert(isOriginalAtom(mce, data));
666    tl_assert(isShadowAtom(mce, vbits));
667    tl_assert(sameKindedAtoms(data, vbits));
668    return assignNew(
669              'V', mce, Ity_I8,
670              binop(Iop_Or8,
671                    assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
672                    vbits) );
673 }
674 
mkImproveOR16(MCEnv * mce,IRAtom * data,IRAtom * vbits)675 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
676 {
677    tl_assert(isOriginalAtom(mce, data));
678    tl_assert(isShadowAtom(mce, vbits));
679    tl_assert(sameKindedAtoms(data, vbits));
680    return assignNew(
681              'V', mce, Ity_I16,
682              binop(Iop_Or16,
683                    assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
684                    vbits) );
685 }
686 
mkImproveOR32(MCEnv * mce,IRAtom * data,IRAtom * vbits)687 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
688 {
689    tl_assert(isOriginalAtom(mce, data));
690    tl_assert(isShadowAtom(mce, vbits));
691    tl_assert(sameKindedAtoms(data, vbits));
692    return assignNew(
693              'V', mce, Ity_I32,
694              binop(Iop_Or32,
695                    assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
696                    vbits) );
697 }
698 
mkImproveOR64(MCEnv * mce,IRAtom * data,IRAtom * vbits)699 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
700 {
701    tl_assert(isOriginalAtom(mce, data));
702    tl_assert(isShadowAtom(mce, vbits));
703    tl_assert(sameKindedAtoms(data, vbits));
704    return assignNew(
705              'V', mce, Ity_I64,
706              binop(Iop_Or64,
707                    assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
708                    vbits) );
709 }
710 
mkImproveORV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)711 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
712 {
713    tl_assert(isOriginalAtom(mce, data));
714    tl_assert(isShadowAtom(mce, vbits));
715    tl_assert(sameKindedAtoms(data, vbits));
716    return assignNew(
717              'V', mce, Ity_V128,
718              binop(Iop_OrV128,
719                    assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
720                    vbits) );
721 }
722 
mkImproveORV256(MCEnv * mce,IRAtom * data,IRAtom * vbits)723 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
724 {
725    tl_assert(isOriginalAtom(mce, data));
726    tl_assert(isShadowAtom(mce, vbits));
727    tl_assert(sameKindedAtoms(data, vbits));
728    return assignNew(
729              'V', mce, Ity_V256,
730              binop(Iop_OrV256,
731                    assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
732                    vbits) );
733 }
734 
735 /* --------- Pessimising casts. --------- */
736 
737 /* The function returns an expression of type DST_TY. If any of the VBITS
738    is undefined (value == 1) the resulting expression has all bits set to
739    1. Otherwise, all bits are 0. */
740 
mkPCastTo(MCEnv * mce,IRType dst_ty,IRAtom * vbits)741 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
742 {
743    IRType  src_ty;
744    IRAtom* tmp1;
745 
746    /* Note, dst_ty is a shadow type, not an original type. */
747    tl_assert(isShadowAtom(mce,vbits));
748    src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
749 
750    /* Fast-track some common cases */
751    if (src_ty == Ity_I32 && dst_ty == Ity_I32)
752       return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
753 
754    if (src_ty == Ity_I64 && dst_ty == Ity_I64)
755       return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
756 
757    if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
758       /* PCast the arg, then clone it. */
759       IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
760       return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
761    }
762 
763    if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
764       /* PCast the arg.  This gives all 0s or all 1s.  Then throw away
765          the top half. */
766       IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
767       return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
768    }
769 
770    /* Else do it the slow way .. */
771    /* First of all, collapse vbits down to a single bit. */
772    tmp1   = NULL;
773    switch (src_ty) {
774       case Ity_I1:
775          tmp1 = vbits;
776          break;
777       case Ity_I8:
778          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
779          break;
780       case Ity_I16:
781          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
782          break;
783       case Ity_I32:
784          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
785          break;
786       case Ity_I64:
787          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
788          break;
789       case Ity_I128: {
790          /* Gah.  Chop it in half, OR the halves together, and compare
791             that with zero. */
792          IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
793          IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
794          IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
795          tmp1         = assignNew('V', mce, Ity_I1,
796                                        unop(Iop_CmpNEZ64, tmp4));
797          break;
798       }
799       default:
800          ppIRType(src_ty);
801          VG_(tool_panic)("mkPCastTo(1)");
802    }
803    tl_assert(tmp1);
804    /* Now widen up to the dst type. */
805    switch (dst_ty) {
806       case Ity_I1:
807          return tmp1;
808       case Ity_I8:
809          return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
810       case Ity_I16:
811          return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
812       case Ity_I32:
813          return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
814       case Ity_I64:
815          return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
816       case Ity_V128:
817          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
818          tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
819          return tmp1;
820       case Ity_I128:
821          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
822          tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
823          return tmp1;
824       default:
825          ppIRType(dst_ty);
826          VG_(tool_panic)("mkPCastTo(2)");
827    }
828 }
829 
830 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
831 /*
832    Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
833    PCasting to Ity_U1.  However, sometimes it is necessary to be more
834    accurate.  The insight is that the result is defined if two
835    corresponding bits can be found, one from each argument, so that
836    both bits are defined but are different -- that makes EQ say "No"
837    and NE say "Yes".  Hence, we compute an improvement term and DifD
838    it onto the "normal" (UifU) result.
839 
840    The result is:
841 
842    PCastTo<1> (
843       -- naive version
844       PCastTo<sz>( UifU<sz>(vxx, vyy) )
845 
846       `DifD<sz>`
847 
848       -- improvement term
849       PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
850    )
851 
852    where
853      vec contains 0 (defined) bits where the corresponding arg bits
854      are defined but different, and 1 bits otherwise.
855 
856      vec = Or<sz>( vxx,   // 0 iff bit defined
857                    vyy,   // 0 iff bit defined
858                    Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
859                  )
860 
861      If any bit of vec is 0, the result is defined and so the
862      improvement term should produce 0...0, else it should produce
863      1...1.
864 
865      Hence require for the improvement term:
866 
867         if vec == 1...1 then 1...1 else 0...0
868      ->
869         PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
870 
871    This was extensively re-analysed and checked on 6 July 05.
872 */
expensiveCmpEQorNE(MCEnv * mce,IRType ty,IRAtom * vxx,IRAtom * vyy,IRAtom * xx,IRAtom * yy)873 static IRAtom* expensiveCmpEQorNE ( MCEnv*  mce,
874                                     IRType  ty,
875                                     IRAtom* vxx, IRAtom* vyy,
876                                     IRAtom* xx,  IRAtom* yy )
877 {
878    IRAtom *naive, *vec, *improvement_term;
879    IRAtom *improved, *final_cast, *top;
880    IROp   opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
881 
882    tl_assert(isShadowAtom(mce,vxx));
883    tl_assert(isShadowAtom(mce,vyy));
884    tl_assert(isOriginalAtom(mce,xx));
885    tl_assert(isOriginalAtom(mce,yy));
886    tl_assert(sameKindedAtoms(vxx,xx));
887    tl_assert(sameKindedAtoms(vyy,yy));
888 
889    switch (ty) {
890       case Ity_I32:
891          opOR   = Iop_Or32;
892          opDIFD = Iop_And32;
893          opUIFU = Iop_Or32;
894          opNOT  = Iop_Not32;
895          opXOR  = Iop_Xor32;
896          opCMP  = Iop_CmpEQ32;
897          top    = mkU32(0xFFFFFFFF);
898          break;
899       case Ity_I64:
900          opOR   = Iop_Or64;
901          opDIFD = Iop_And64;
902          opUIFU = Iop_Or64;
903          opNOT  = Iop_Not64;
904          opXOR  = Iop_Xor64;
905          opCMP  = Iop_CmpEQ64;
906          top    = mkU64(0xFFFFFFFFFFFFFFFFULL);
907          break;
908       default:
909          VG_(tool_panic)("expensiveCmpEQorNE");
910    }
911 
912    naive
913       = mkPCastTo(mce,ty,
914                   assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
915 
916    vec
917       = assignNew(
918            'V', mce,ty,
919            binop( opOR,
920                   assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
921                   assignNew(
922                      'V', mce,ty,
923                      unop( opNOT,
924                            assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
925 
926    improvement_term
927       = mkPCastTo( mce,ty,
928                    assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
929 
930    improved
931       = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
932 
933    final_cast
934       = mkPCastTo( mce, Ity_I1, improved );
935 
936    return final_cast;
937 }
938 
939 
940 /* --------- Semi-accurate interpretation of CmpORD. --------- */
941 
942 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
943 
944       CmpORD32S(x,y) = 1<<3   if  x <s y
945                      = 1<<2   if  x >s y
946                      = 1<<1   if  x == y
947 
948    and similarly the unsigned variant.  The default interpretation is:
949 
950       CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
951                                   & (7<<1)
952 
953    The "& (7<<1)" reflects the fact that all result bits except 3,2,1
954    are zero and therefore defined (viz, zero).
955 
956    Also deal with a special case better:
957 
958       CmpORD32S(x,0)
959 
960    Here, bit 3 (LT) of the result is a copy of the top bit of x and
961    will be defined even if the rest of x isn't.  In which case we do:
962 
963       CmpORD32S#(x,x#,0,{impliedly 0}#)
964          = PCast(x#) & (3<<1)      -- standard interp for GT#,EQ#
965            | (x# >>u 31) << 3      -- LT# = x#[31]
966 
967    Analogous handling for CmpORD64{S,U}.
968 */
isZeroU32(IRAtom * e)969 static Bool isZeroU32 ( IRAtom* e )
970 {
971    return
972       toBool( e->tag == Iex_Const
973               && e->Iex.Const.con->tag == Ico_U32
974               && e->Iex.Const.con->Ico.U32 == 0 );
975 }
976 
isZeroU64(IRAtom * e)977 static Bool isZeroU64 ( IRAtom* e )
978 {
979    return
980       toBool( e->tag == Iex_Const
981               && e->Iex.Const.con->tag == Ico_U64
982               && e->Iex.Const.con->Ico.U64 == 0 );
983 }
984 
doCmpORD(MCEnv * mce,IROp cmp_op,IRAtom * xxhash,IRAtom * yyhash,IRAtom * xx,IRAtom * yy)985 static IRAtom* doCmpORD ( MCEnv*  mce,
986                           IROp    cmp_op,
987                           IRAtom* xxhash, IRAtom* yyhash,
988                           IRAtom* xx,     IRAtom* yy )
989 {
990    Bool   m64    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
991    Bool   syned  = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
992    IROp   opOR   = m64 ? Iop_Or64  : Iop_Or32;
993    IROp   opAND  = m64 ? Iop_And64 : Iop_And32;
994    IROp   opSHL  = m64 ? Iop_Shl64 : Iop_Shl32;
995    IROp   opSHR  = m64 ? Iop_Shr64 : Iop_Shr32;
996    IRType ty     = m64 ? Ity_I64   : Ity_I32;
997    Int    width  = m64 ? 64        : 32;
998 
999    Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1000 
1001    IRAtom* threeLeft1 = NULL;
1002    IRAtom* sevenLeft1 = NULL;
1003 
1004    tl_assert(isShadowAtom(mce,xxhash));
1005    tl_assert(isShadowAtom(mce,yyhash));
1006    tl_assert(isOriginalAtom(mce,xx));
1007    tl_assert(isOriginalAtom(mce,yy));
1008    tl_assert(sameKindedAtoms(xxhash,xx));
1009    tl_assert(sameKindedAtoms(yyhash,yy));
1010    tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1011              || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
1012 
1013    if (0) {
1014       ppIROp(cmp_op); VG_(printf)(" ");
1015       ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1016    }
1017 
1018    if (syned && isZero(yy)) {
1019       /* fancy interpretation */
1020       /* if yy is zero, then it must be fully defined (zero#). */
1021       tl_assert(isZero(yyhash));
1022       threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
1023       return
1024          binop(
1025             opOR,
1026             assignNew(
1027                'V', mce,ty,
1028                binop(
1029                   opAND,
1030                   mkPCastTo(mce,ty, xxhash),
1031                   threeLeft1
1032                )),
1033             assignNew(
1034                'V', mce,ty,
1035                binop(
1036                   opSHL,
1037                   assignNew(
1038                      'V', mce,ty,
1039                      binop(opSHR, xxhash, mkU8(width-1))),
1040                   mkU8(3)
1041                ))
1042 	 );
1043    } else {
1044       /* standard interpretation */
1045       sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
1046       return
1047          binop(
1048             opAND,
1049             mkPCastTo( mce,ty,
1050                        mkUifU(mce,ty, xxhash,yyhash)),
1051             sevenLeft1
1052          );
1053    }
1054 }
1055 
1056 
1057 /*------------------------------------------------------------*/
1058 /*--- Emit a test and complaint if something is undefined. ---*/
1059 /*------------------------------------------------------------*/
1060 
1061 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1062 
1063 
1064 /* Set the annotations on a dirty helper to indicate that the stack
1065    pointer and instruction pointers might be read.  This is the
1066    behaviour of all 'emit-a-complaint' style functions we might
1067    call. */
1068 
setHelperAnns(MCEnv * mce,IRDirty * di)1069 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1070    di->nFxState = 2;
1071    di->fxState[0].fx        = Ifx_Read;
1072    di->fxState[0].offset    = mce->layout->offset_SP;
1073    di->fxState[0].size      = mce->layout->sizeof_SP;
1074    di->fxState[0].nRepeats  = 0;
1075    di->fxState[0].repeatLen = 0;
1076    di->fxState[1].fx        = Ifx_Read;
1077    di->fxState[1].offset    = mce->layout->offset_IP;
1078    di->fxState[1].size      = mce->layout->sizeof_IP;
1079    di->fxState[1].nRepeats  = 0;
1080    di->fxState[1].repeatLen = 0;
1081 }
1082 
1083 
1084 /* Check the supplied **original** atom for undefinedness, and emit a
1085    complaint if so.  Once that happens, mark it as defined.  This is
1086    possible because the atom is either a tmp or literal.  If it's a
1087    tmp, it will be shadowed by a tmp, and so we can set the shadow to
1088    be defined.  In fact as mentioned above, we will have to allocate a
1089    new tmp to carry the new 'defined' shadow value, and update the
1090    original->tmp mapping accordingly; we cannot simply assign a new
1091    value to an existing shadow tmp as this breaks SSAness -- resulting
1092    in the post-instrumentation sanity checker spluttering in disapproval.
1093 */
complainIfUndefined(MCEnv * mce,IRAtom * atom,IRExpr * guard)1094 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
1095 {
1096    IRAtom*  vatom;
1097    IRType   ty;
1098    Int      sz;
1099    IRDirty* di;
1100    IRAtom*  cond;
1101    IRAtom*  origin;
1102    void*    fn;
1103    HChar*   nm;
1104    IRExpr** args;
1105    Int      nargs;
1106 
1107    // Don't do V bit tests if we're not reporting undefined value errors.
1108    if (MC_(clo_mc_level) == 1)
1109       return;
1110 
1111    /* Since the original expression is atomic, there's no duplicated
1112       work generated by making multiple V-expressions for it.  So we
1113       don't really care about the possibility that someone else may
1114       also create a V-interpretion for it. */
1115    tl_assert(isOriginalAtom(mce, atom));
1116    vatom = expr2vbits( mce, atom );
1117    tl_assert(isShadowAtom(mce, vatom));
1118    tl_assert(sameKindedAtoms(atom, vatom));
1119 
1120    ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1121 
1122    /* sz is only used for constructing the error message */
1123    sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1124 
1125    cond = mkPCastTo( mce, Ity_I1, vatom );
1126    /* cond will be 0 if all defined, and 1 if any not defined. */
1127 
1128    /* Get the origin info for the value we are about to check.  At
1129       least, if we are doing origin tracking.  If not, use a dummy
1130       zero origin. */
1131    if (MC_(clo_mc_level) == 3) {
1132       origin = schemeE( mce, atom );
1133       if (mce->hWordTy == Ity_I64) {
1134          origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1135       }
1136    } else {
1137       origin = NULL;
1138    }
1139 
1140    fn    = NULL;
1141    nm    = NULL;
1142    args  = NULL;
1143    nargs = -1;
1144 
1145    switch (sz) {
1146       case 0:
1147          if (origin) {
1148             fn    = &MC_(helperc_value_check0_fail_w_o);
1149             nm    = "MC_(helperc_value_check0_fail_w_o)";
1150             args  = mkIRExprVec_1(origin);
1151             nargs = 1;
1152          } else {
1153             fn    = &MC_(helperc_value_check0_fail_no_o);
1154             nm    = "MC_(helperc_value_check0_fail_no_o)";
1155             args  = mkIRExprVec_0();
1156             nargs = 0;
1157          }
1158          break;
1159       case 1:
1160          if (origin) {
1161             fn    = &MC_(helperc_value_check1_fail_w_o);
1162             nm    = "MC_(helperc_value_check1_fail_w_o)";
1163             args  = mkIRExprVec_1(origin);
1164             nargs = 1;
1165          } else {
1166             fn    = &MC_(helperc_value_check1_fail_no_o);
1167             nm    = "MC_(helperc_value_check1_fail_no_o)";
1168             args  = mkIRExprVec_0();
1169             nargs = 0;
1170          }
1171          break;
1172       case 4:
1173          if (origin) {
1174             fn    = &MC_(helperc_value_check4_fail_w_o);
1175             nm    = "MC_(helperc_value_check4_fail_w_o)";
1176             args  = mkIRExprVec_1(origin);
1177             nargs = 1;
1178          } else {
1179             fn    = &MC_(helperc_value_check4_fail_no_o);
1180             nm    = "MC_(helperc_value_check4_fail_no_o)";
1181             args  = mkIRExprVec_0();
1182             nargs = 0;
1183          }
1184          break;
1185       case 8:
1186          if (origin) {
1187             fn    = &MC_(helperc_value_check8_fail_w_o);
1188             nm    = "MC_(helperc_value_check8_fail_w_o)";
1189             args  = mkIRExprVec_1(origin);
1190             nargs = 1;
1191          } else {
1192             fn    = &MC_(helperc_value_check8_fail_no_o);
1193             nm    = "MC_(helperc_value_check8_fail_no_o)";
1194             args  = mkIRExprVec_0();
1195             nargs = 0;
1196          }
1197          break;
1198       case 2:
1199       case 16:
1200          if (origin) {
1201             fn    = &MC_(helperc_value_checkN_fail_w_o);
1202             nm    = "MC_(helperc_value_checkN_fail_w_o)";
1203             args  = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1204             nargs = 2;
1205          } else {
1206             fn    = &MC_(helperc_value_checkN_fail_no_o);
1207             nm    = "MC_(helperc_value_checkN_fail_no_o)";
1208             args  = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1209             nargs = 1;
1210          }
1211          break;
1212       default:
1213          VG_(tool_panic)("unexpected szB");
1214    }
1215 
1216    tl_assert(fn);
1217    tl_assert(nm);
1218    tl_assert(args);
1219    tl_assert(nargs >= 0 && nargs <= 2);
1220    tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1221               || (MC_(clo_mc_level) == 2 && origin == NULL) );
1222 
1223    di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1224                            VG_(fnptr_to_fnentry)( fn ), args );
1225    di->guard = cond;
1226 
1227    /* If the complaint is to be issued under a guard condition, AND that
1228       guard condition. */
1229    if (guard) {
1230      IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1231      IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1232      IRAtom *e  = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1233 
1234      di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1235    }
1236 
1237    setHelperAnns( mce, di );
1238    stmt( 'V', mce, IRStmt_Dirty(di));
1239 
1240    /* Set the shadow tmp to be defined.  First, update the
1241       orig->shadow tmp mapping to reflect the fact that this shadow is
1242       getting a new value. */
1243    tl_assert(isIRAtom(vatom));
1244    /* sameKindedAtoms ... */
1245    if (vatom->tag == Iex_RdTmp) {
1246       tl_assert(atom->tag == Iex_RdTmp);
1247       newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1248       assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1249                        definedOfType(ty));
1250    }
1251 }
1252 
1253 
1254 /*------------------------------------------------------------*/
1255 /*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1256 /*------------------------------------------------------------*/
1257 
1258 /* Examine the always-defined sections declared in layout to see if
1259    the (offset,size) section is within one.  Note, is is an error to
1260    partially fall into such a region: (offset,size) should either be
1261    completely in such a region or completely not-in such a region.
1262 */
isAlwaysDefd(MCEnv * mce,Int offset,Int size)1263 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1264 {
1265    Int minoffD, maxoffD, i;
1266    Int minoff = offset;
1267    Int maxoff = minoff + size - 1;
1268    tl_assert((minoff & ~0xFFFF) == 0);
1269    tl_assert((maxoff & ~0xFFFF) == 0);
1270 
1271    for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1272       minoffD = mce->layout->alwaysDefd[i].offset;
1273       maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1274       tl_assert((minoffD & ~0xFFFF) == 0);
1275       tl_assert((maxoffD & ~0xFFFF) == 0);
1276 
1277       if (maxoff < minoffD || maxoffD < minoff)
1278          continue; /* no overlap */
1279       if (minoff >= minoffD && maxoff <= maxoffD)
1280          return True; /* completely contained in an always-defd section */
1281 
1282       VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1283    }
1284    return False; /* could not find any containing section */
1285 }
1286 
1287 
1288 /* Generate into bb suitable actions to shadow this Put.  If the state
1289    slice is marked 'always defined', do nothing.  Otherwise, write the
1290    supplied V bits to the shadow state.  We can pass in either an
1291    original atom or a V-atom, but not both.  In the former case the
1292    relevant V-bits are then generated from the original.
1293    We assume here, that the definedness of GUARD has already been checked.
1294 */
1295 static
do_shadow_PUT(MCEnv * mce,Int offset,IRAtom * atom,IRAtom * vatom,IRExpr * guard)1296 void do_shadow_PUT ( MCEnv* mce,  Int offset,
1297                      IRAtom* atom, IRAtom* vatom, IRExpr *guard )
1298 {
1299    IRType ty;
1300 
1301    // Don't do shadow PUTs if we're not doing undefined value checking.
1302    // Their absence lets Vex's optimiser remove all the shadow computation
1303    // that they depend on, which includes GETs of the shadow registers.
1304    if (MC_(clo_mc_level) == 1)
1305       return;
1306 
1307    if (atom) {
1308       tl_assert(!vatom);
1309       tl_assert(isOriginalAtom(mce, atom));
1310       vatom = expr2vbits( mce, atom );
1311    } else {
1312       tl_assert(vatom);
1313       tl_assert(isShadowAtom(mce, vatom));
1314    }
1315 
1316    ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1317    tl_assert(ty != Ity_I1);
1318    tl_assert(ty != Ity_I128);
1319    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1320       /* later: no ... */
1321       /* emit code to emit a complaint if any of the vbits are 1. */
1322       /* complainIfUndefined(mce, atom); */
1323    } else {
1324       /* Do a plain shadow Put. */
1325       if (guard) {
1326          /* If the guard expression evaluates to false we simply Put the value
1327             that is already stored in the guest state slot */
1328          IRAtom *cond, *iffalse;
1329 
1330          cond    = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
1331          iffalse = assignNew('V', mce, ty,
1332                              IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1333          vatom   = assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, vatom));
1334       }
1335       stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
1336    }
1337 }
1338 
1339 
1340 /* Return an expression which contains the V bits corresponding to the
1341    given GETI (passed in in pieces).
1342 */
1343 static
do_shadow_PUTI(MCEnv * mce,IRPutI * puti)1344 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
1345 {
1346    IRAtom* vatom;
1347    IRType  ty, tyS;
1348    Int     arrSize;;
1349    IRRegArray* descr = puti->descr;
1350    IRAtom*     ix    = puti->ix;
1351    Int         bias  = puti->bias;
1352    IRAtom*     atom  = puti->data;
1353 
1354    // Don't do shadow PUTIs if we're not doing undefined value checking.
1355    // Their absence lets Vex's optimiser remove all the shadow computation
1356    // that they depend on, which includes GETIs of the shadow registers.
1357    if (MC_(clo_mc_level) == 1)
1358       return;
1359 
1360    tl_assert(isOriginalAtom(mce,atom));
1361    vatom = expr2vbits( mce, atom );
1362    tl_assert(sameKindedAtoms(atom, vatom));
1363    ty   = descr->elemTy;
1364    tyS  = shadowTypeV(ty);
1365    arrSize = descr->nElems * sizeofIRType(ty);
1366    tl_assert(ty != Ity_I1);
1367    tl_assert(isOriginalAtom(mce,ix));
1368    complainIfUndefined(mce, ix, NULL);
1369    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1370       /* later: no ... */
1371       /* emit code to emit a complaint if any of the vbits are 1. */
1372       /* complainIfUndefined(mce, atom); */
1373    } else {
1374       /* Do a cloned version of the Put that refers to the shadow
1375          area. */
1376       IRRegArray* new_descr
1377          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1378                          tyS, descr->nElems);
1379       stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
1380    }
1381 }
1382 
1383 
1384 /* Return an expression which contains the V bits corresponding to the
1385    given GET (passed in in pieces).
1386 */
1387 static
shadow_GET(MCEnv * mce,Int offset,IRType ty)1388 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1389 {
1390    IRType tyS = shadowTypeV(ty);
1391    tl_assert(ty != Ity_I1);
1392    tl_assert(ty != Ity_I128);
1393    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1394       /* Always defined, return all zeroes of the relevant type */
1395       return definedOfType(tyS);
1396    } else {
1397       /* return a cloned version of the Get that refers to the shadow
1398          area. */
1399       /* FIXME: this isn't an atom! */
1400       return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1401    }
1402 }
1403 
1404 
1405 /* Return an expression which contains the V bits corresponding to the
1406    given GETI (passed in in pieces).
1407 */
1408 static
shadow_GETI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias)1409 IRExpr* shadow_GETI ( MCEnv* mce,
1410                       IRRegArray* descr, IRAtom* ix, Int bias )
1411 {
1412    IRType ty   = descr->elemTy;
1413    IRType tyS  = shadowTypeV(ty);
1414    Int arrSize = descr->nElems * sizeofIRType(ty);
1415    tl_assert(ty != Ity_I1);
1416    tl_assert(isOriginalAtom(mce,ix));
1417    complainIfUndefined(mce, ix, NULL);
1418    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1419       /* Always defined, return all zeroes of the relevant type */
1420       return definedOfType(tyS);
1421    } else {
1422       /* return a cloned version of the Get that refers to the shadow
1423          area. */
1424       IRRegArray* new_descr
1425          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1426                          tyS, descr->nElems);
1427       return IRExpr_GetI( new_descr, ix, bias );
1428    }
1429 }
1430 
1431 
1432 /*------------------------------------------------------------*/
1433 /*--- Generating approximations for unknown operations,    ---*/
1434 /*--- using lazy-propagate semantics                       ---*/
1435 /*------------------------------------------------------------*/
1436 
1437 /* Lazy propagation of undefinedness from two values, resulting in the
1438    specified shadow type.
1439 */
1440 static
mkLazy2(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2)1441 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1442 {
1443    IRAtom* at;
1444    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1445    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1446    tl_assert(isShadowAtom(mce,va1));
1447    tl_assert(isShadowAtom(mce,va2));
1448 
1449    /* The general case is inefficient because PCast is an expensive
1450       operation.  Here are some special cases which use PCast only
1451       once rather than twice. */
1452 
1453    /* I64 x I64 -> I64 */
1454    if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1455       if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1456       at = mkUifU(mce, Ity_I64, va1, va2);
1457       at = mkPCastTo(mce, Ity_I64, at);
1458       return at;
1459    }
1460 
1461    /* I64 x I64 -> I32 */
1462    if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1463       if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1464       at = mkUifU(mce, Ity_I64, va1, va2);
1465       at = mkPCastTo(mce, Ity_I32, at);
1466       return at;
1467    }
1468 
1469    if (0) {
1470       VG_(printf)("mkLazy2 ");
1471       ppIRType(t1);
1472       VG_(printf)("_");
1473       ppIRType(t2);
1474       VG_(printf)("_");
1475       ppIRType(finalVty);
1476       VG_(printf)("\n");
1477    }
1478 
1479    /* General case: force everything via 32-bit intermediaries. */
1480    at = mkPCastTo(mce, Ity_I32, va1);
1481    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1482    at = mkPCastTo(mce, finalVty, at);
1483    return at;
1484 }
1485 
1486 
1487 /* 3-arg version of the above. */
1488 static
mkLazy3(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2,IRAtom * va3)1489 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1490                   IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1491 {
1492    IRAtom* at;
1493    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1494    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1495    IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1496    tl_assert(isShadowAtom(mce,va1));
1497    tl_assert(isShadowAtom(mce,va2));
1498    tl_assert(isShadowAtom(mce,va3));
1499 
1500    /* The general case is inefficient because PCast is an expensive
1501       operation.  Here are some special cases which use PCast only
1502       twice rather than three times. */
1503 
1504    /* I32 x I64 x I64 -> I64 */
1505    /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1506    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1507        && finalVty == Ity_I64) {
1508       if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1509       /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1510          mode indication which is fully defined, this should get
1511          folded out later. */
1512       at = mkPCastTo(mce, Ity_I64, va1);
1513       /* Now fold in 2nd and 3rd args. */
1514       at = mkUifU(mce, Ity_I64, at, va2);
1515       at = mkUifU(mce, Ity_I64, at, va3);
1516       /* and PCast once again. */
1517       at = mkPCastTo(mce, Ity_I64, at);
1518       return at;
1519    }
1520 
1521    /* I32 x I64 x I64 -> I32 */
1522    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1523        && finalVty == Ity_I32) {
1524       if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1525       at = mkPCastTo(mce, Ity_I64, va1);
1526       at = mkUifU(mce, Ity_I64, at, va2);
1527       at = mkUifU(mce, Ity_I64, at, va3);
1528       at = mkPCastTo(mce, Ity_I32, at);
1529       return at;
1530    }
1531 
1532    /* I32 x I32 x I32 -> I32 */
1533    /* 32-bit FP idiom, as (eg) happens on ARM */
1534    if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1535        && finalVty == Ity_I32) {
1536       if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1537       at = va1;
1538       at = mkUifU(mce, Ity_I32, at, va2);
1539       at = mkUifU(mce, Ity_I32, at, va3);
1540       at = mkPCastTo(mce, Ity_I32, at);
1541       return at;
1542    }
1543 
1544    /* I32 x I128 x I128 -> I128 */
1545    /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1546    if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1547        && finalVty == Ity_I128) {
1548       if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1549       /* Widen 1st arg to I128.  Since 1st arg is typically a rounding
1550          mode indication which is fully defined, this should get
1551          folded out later. */
1552       at = mkPCastTo(mce, Ity_I128, va1);
1553       /* Now fold in 2nd and 3rd args. */
1554       at = mkUifU(mce, Ity_I128, at, va2);
1555       at = mkUifU(mce, Ity_I128, at, va3);
1556       /* and PCast once again. */
1557       at = mkPCastTo(mce, Ity_I128, at);
1558       return at;
1559    }
1560    if (1) {
1561       VG_(printf)("mkLazy3: ");
1562       ppIRType(t1);
1563       VG_(printf)(" x ");
1564       ppIRType(t2);
1565       VG_(printf)(" x ");
1566       ppIRType(t3);
1567       VG_(printf)(" -> ");
1568       ppIRType(finalVty);
1569       VG_(printf)("\n");
1570    }
1571 
1572    tl_assert(0);
1573    /* General case: force everything via 32-bit intermediaries. */
1574    /*
1575    at = mkPCastTo(mce, Ity_I32, va1);
1576    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1577    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1578    at = mkPCastTo(mce, finalVty, at);
1579    return at;
1580    */
1581 }
1582 
1583 
1584 /* 4-arg version of the above. */
1585 static
mkLazy4(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2,IRAtom * va3,IRAtom * va4)1586 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1587                   IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1588 {
1589    IRAtom* at;
1590    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1591    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1592    IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1593    IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1594    tl_assert(isShadowAtom(mce,va1));
1595    tl_assert(isShadowAtom(mce,va2));
1596    tl_assert(isShadowAtom(mce,va3));
1597    tl_assert(isShadowAtom(mce,va4));
1598 
1599    /* The general case is inefficient because PCast is an expensive
1600       operation.  Here are some special cases which use PCast only
1601       twice rather than three times. */
1602 
1603    /* I32 x I64 x I64 x I64 -> I64 */
1604    /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1605    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1606        && finalVty == Ity_I64) {
1607       if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1608       /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1609          mode indication which is fully defined, this should get
1610          folded out later. */
1611       at = mkPCastTo(mce, Ity_I64, va1);
1612       /* Now fold in 2nd, 3rd, 4th args. */
1613       at = mkUifU(mce, Ity_I64, at, va2);
1614       at = mkUifU(mce, Ity_I64, at, va3);
1615       at = mkUifU(mce, Ity_I64, at, va4);
1616       /* and PCast once again. */
1617       at = mkPCastTo(mce, Ity_I64, at);
1618       return at;
1619    }
1620    /* I32 x I32 x I32 x I32 -> I32 */
1621    /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1622    if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1623        && finalVty == Ity_I32) {
1624       if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1625       at = va1;
1626       /* Now fold in 2nd, 3rd, 4th args. */
1627       at = mkUifU(mce, Ity_I32, at, va2);
1628       at = mkUifU(mce, Ity_I32, at, va3);
1629       at = mkUifU(mce, Ity_I32, at, va4);
1630       at = mkPCastTo(mce, Ity_I32, at);
1631       return at;
1632    }
1633 
1634    if (1) {
1635       VG_(printf)("mkLazy4: ");
1636       ppIRType(t1);
1637       VG_(printf)(" x ");
1638       ppIRType(t2);
1639       VG_(printf)(" x ");
1640       ppIRType(t3);
1641       VG_(printf)(" x ");
1642       ppIRType(t4);
1643       VG_(printf)(" -> ");
1644       ppIRType(finalVty);
1645       VG_(printf)("\n");
1646    }
1647 
1648    tl_assert(0);
1649 }
1650 
1651 
1652 /* Do the lazy propagation game from a null-terminated vector of
1653    atoms.  This is presumably the arguments to a helper call, so the
1654    IRCallee info is also supplied in order that we can know which
1655    arguments should be ignored (via the .mcx_mask field).
1656 */
1657 static
mkLazyN(MCEnv * mce,IRAtom ** exprvec,IRType finalVtype,IRCallee * cee)1658 IRAtom* mkLazyN ( MCEnv* mce,
1659                   IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1660 {
1661    Int     i;
1662    IRAtom* here;
1663    IRAtom* curr;
1664    IRType  mergeTy;
1665    Bool    mergeTy64 = True;
1666 
1667    /* Decide on the type of the merge intermediary.  If all relevant
1668       args are I64, then it's I64.  In all other circumstances, use
1669       I32. */
1670    for (i = 0; exprvec[i]; i++) {
1671       tl_assert(i < 32);
1672       tl_assert(isOriginalAtom(mce, exprvec[i]));
1673       if (cee->mcx_mask & (1<<i))
1674          continue;
1675       if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
1676          mergeTy64 = False;
1677    }
1678 
1679    mergeTy = mergeTy64  ? Ity_I64  : Ity_I32;
1680    curr    = definedOfType(mergeTy);
1681 
1682    for (i = 0; exprvec[i]; i++) {
1683       tl_assert(i < 32);
1684       tl_assert(isOriginalAtom(mce, exprvec[i]));
1685       /* Only take notice of this arg if the callee's mc-exclusion
1686          mask does not say it is to be excluded. */
1687       if (cee->mcx_mask & (1<<i)) {
1688          /* the arg is to be excluded from definedness checking.  Do
1689             nothing. */
1690          if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1691       } else {
1692          /* calculate the arg's definedness, and pessimistically merge
1693             it in. */
1694          here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1695          curr = mergeTy64
1696                    ? mkUifU64(mce, here, curr)
1697                    : mkUifU32(mce, here, curr);
1698       }
1699    }
1700    return mkPCastTo(mce, finalVtype, curr );
1701 }
1702 
1703 
1704 /*------------------------------------------------------------*/
1705 /*--- Generating expensive sequences for exact carry-chain ---*/
1706 /*--- propagation in add/sub and related operations.       ---*/
1707 /*------------------------------------------------------------*/
1708 
1709 static
expensiveAddSub(MCEnv * mce,Bool add,IRType ty,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1710 IRAtom* expensiveAddSub ( MCEnv*  mce,
1711                           Bool    add,
1712                           IRType  ty,
1713                           IRAtom* qaa, IRAtom* qbb,
1714                           IRAtom* aa,  IRAtom* bb )
1715 {
1716    IRAtom *a_min, *b_min, *a_max, *b_max;
1717    IROp   opAND, opOR, opXOR, opNOT, opADD, opSUB;
1718 
1719    tl_assert(isShadowAtom(mce,qaa));
1720    tl_assert(isShadowAtom(mce,qbb));
1721    tl_assert(isOriginalAtom(mce,aa));
1722    tl_assert(isOriginalAtom(mce,bb));
1723    tl_assert(sameKindedAtoms(qaa,aa));
1724    tl_assert(sameKindedAtoms(qbb,bb));
1725 
1726    switch (ty) {
1727       case Ity_I32:
1728          opAND = Iop_And32;
1729          opOR  = Iop_Or32;
1730          opXOR = Iop_Xor32;
1731          opNOT = Iop_Not32;
1732          opADD = Iop_Add32;
1733          opSUB = Iop_Sub32;
1734          break;
1735       case Ity_I64:
1736          opAND = Iop_And64;
1737          opOR  = Iop_Or64;
1738          opXOR = Iop_Xor64;
1739          opNOT = Iop_Not64;
1740          opADD = Iop_Add64;
1741          opSUB = Iop_Sub64;
1742          break;
1743       default:
1744          VG_(tool_panic)("expensiveAddSub");
1745    }
1746 
1747    // a_min = aa & ~qaa
1748    a_min = assignNew('V', mce,ty,
1749                      binop(opAND, aa,
1750                                   assignNew('V', mce,ty, unop(opNOT, qaa))));
1751 
1752    // b_min = bb & ~qbb
1753    b_min = assignNew('V', mce,ty,
1754                      binop(opAND, bb,
1755                                   assignNew('V', mce,ty, unop(opNOT, qbb))));
1756 
1757    // a_max = aa | qaa
1758    a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
1759 
1760    // b_max = bb | qbb
1761    b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
1762 
1763    if (add) {
1764       // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1765       return
1766       assignNew('V', mce,ty,
1767          binop( opOR,
1768                 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1769                 assignNew('V', mce,ty,
1770                    binop( opXOR,
1771                           assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1772                           assignNew('V', mce,ty, binop(opADD, a_max, b_max))
1773                    )
1774                 )
1775          )
1776       );
1777    } else {
1778       // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1779       return
1780       assignNew('V', mce,ty,
1781          binop( opOR,
1782                 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1783                 assignNew('V', mce,ty,
1784                    binop( opXOR,
1785                           assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1786                           assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
1787                    )
1788                 )
1789          )
1790       );
1791    }
1792 
1793 }
1794 
1795 
1796 /*------------------------------------------------------------*/
1797 /*--- Scalar shifts.                                       ---*/
1798 /*------------------------------------------------------------*/
1799 
1800 /* Produce an interpretation for (aa << bb) (or >>s, >>u).  The basic
1801    idea is to shift the definedness bits by the original shift amount.
1802    This introduces 0s ("defined") in new positions for left shifts and
1803    unsigned right shifts, and copies the top definedness bit for
1804    signed right shifts.  So, conveniently, applying the original shift
1805    operator to the definedness bits for the left arg is exactly the
1806    right thing to do:
1807 
1808       (qaa << bb)
1809 
1810    However if the shift amount is undefined then the whole result
1811    is undefined.  Hence need:
1812 
1813       (qaa << bb) `UifU` PCast(qbb)
1814 
1815    If the shift amount bb is a literal than qbb will say 'all defined'
1816    and the UifU and PCast will get folded out by post-instrumentation
1817    optimisation.
1818 */
scalarShift(MCEnv * mce,IRType ty,IROp original_op,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1819 static IRAtom* scalarShift ( MCEnv*  mce,
1820                              IRType  ty,
1821                              IROp    original_op,
1822                              IRAtom* qaa, IRAtom* qbb,
1823                              IRAtom* aa,  IRAtom* bb )
1824 {
1825    tl_assert(isShadowAtom(mce,qaa));
1826    tl_assert(isShadowAtom(mce,qbb));
1827    tl_assert(isOriginalAtom(mce,aa));
1828    tl_assert(isOriginalAtom(mce,bb));
1829    tl_assert(sameKindedAtoms(qaa,aa));
1830    tl_assert(sameKindedAtoms(qbb,bb));
1831    return
1832       assignNew(
1833          'V', mce, ty,
1834          mkUifU( mce, ty,
1835                  assignNew('V', mce, ty, binop(original_op, qaa, bb)),
1836                  mkPCastTo(mce, ty, qbb)
1837          )
1838    );
1839 }
1840 
1841 
1842 /*------------------------------------------------------------*/
1843 /*--- Helpers for dealing with vector primops.             ---*/
1844 /*------------------------------------------------------------*/
1845 
1846 /* Vector pessimisation -- pessimise within each lane individually. */
1847 
mkPCast8x16(MCEnv * mce,IRAtom * at)1848 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1849 {
1850    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1851 }
1852 
mkPCast16x8(MCEnv * mce,IRAtom * at)1853 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1854 {
1855    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1856 }
1857 
mkPCast32x4(MCEnv * mce,IRAtom * at)1858 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1859 {
1860    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1861 }
1862 
mkPCast64x2(MCEnv * mce,IRAtom * at)1863 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1864 {
1865    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1866 }
1867 
mkPCast64x4(MCEnv * mce,IRAtom * at)1868 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
1869 {
1870    return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
1871 }
1872 
mkPCast32x8(MCEnv * mce,IRAtom * at)1873 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
1874 {
1875    return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
1876 }
1877 
mkPCast32x2(MCEnv * mce,IRAtom * at)1878 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1879 {
1880    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1881 }
1882 
mkPCast16x4(MCEnv * mce,IRAtom * at)1883 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1884 {
1885    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1886 }
1887 
mkPCast8x8(MCEnv * mce,IRAtom * at)1888 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1889 {
1890    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1891 }
1892 
mkPCast16x2(MCEnv * mce,IRAtom * at)1893 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1894 {
1895    return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1896 }
1897 
mkPCast8x4(MCEnv * mce,IRAtom * at)1898 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1899 {
1900    return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1901 }
1902 
1903 
1904 /* Here's a simple scheme capable of handling ops derived from SSE1
1905    code and while only generating ops that can be efficiently
1906    implemented in SSE1. */
1907 
1908 /* All-lanes versions are straightforward:
1909 
1910    binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1911 
1912    unary32Fx4(x,y)    ==> PCast32x4(x#)
1913 
1914    Lowest-lane-only versions are more complex:
1915 
1916    binary32F0x4(x,y)  ==> SetV128lo32(
1917                              x#,
1918                              PCast32(V128to32(UifUV128(x#,y#)))
1919                           )
1920 
1921    This is perhaps not so obvious.  In particular, it's faster to
1922    do a V128-bit UifU and then take the bottom 32 bits than the more
1923    obvious scheme of taking the bottom 32 bits of each operand
1924    and doing a 32-bit UifU.  Basically since UifU is fast and
1925    chopping lanes off vector values is slow.
1926 
1927    Finally:
1928 
1929    unary32F0x4(x)     ==> SetV128lo32(
1930                              x#,
1931                              PCast32(V128to32(x#))
1932                           )
1933 
1934    Where:
1935 
1936    PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
1937    PCast32x4(v#) = CmpNEZ32x4(v#)
1938 */
1939 
1940 static
binary32Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1941 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1942 {
1943    IRAtom* at;
1944    tl_assert(isShadowAtom(mce, vatomX));
1945    tl_assert(isShadowAtom(mce, vatomY));
1946    at = mkUifUV128(mce, vatomX, vatomY);
1947    at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
1948    return at;
1949 }
1950 
1951 static
unary32Fx4(MCEnv * mce,IRAtom * vatomX)1952 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1953 {
1954    IRAtom* at;
1955    tl_assert(isShadowAtom(mce, vatomX));
1956    at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
1957    return at;
1958 }
1959 
1960 static
binary32F0x4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1961 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1962 {
1963    IRAtom* at;
1964    tl_assert(isShadowAtom(mce, vatomX));
1965    tl_assert(isShadowAtom(mce, vatomY));
1966    at = mkUifUV128(mce, vatomX, vatomY);
1967    at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
1968    at = mkPCastTo(mce, Ity_I32, at);
1969    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1970    return at;
1971 }
1972 
1973 static
unary32F0x4(MCEnv * mce,IRAtom * vatomX)1974 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1975 {
1976    IRAtom* at;
1977    tl_assert(isShadowAtom(mce, vatomX));
1978    at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
1979    at = mkPCastTo(mce, Ity_I32, at);
1980    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1981    return at;
1982 }
1983 
1984 /* --- ... and ... 64Fx2 versions of the same ... --- */
1985 
1986 static
binary64Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1987 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1988 {
1989    IRAtom* at;
1990    tl_assert(isShadowAtom(mce, vatomX));
1991    tl_assert(isShadowAtom(mce, vatomY));
1992    at = mkUifUV128(mce, vatomX, vatomY);
1993    at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
1994    return at;
1995 }
1996 
1997 static
unary64Fx2(MCEnv * mce,IRAtom * vatomX)1998 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1999 {
2000    IRAtom* at;
2001    tl_assert(isShadowAtom(mce, vatomX));
2002    at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
2003    return at;
2004 }
2005 
2006 static
binary64F0x2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2007 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2008 {
2009    IRAtom* at;
2010    tl_assert(isShadowAtom(mce, vatomX));
2011    tl_assert(isShadowAtom(mce, vatomY));
2012    at = mkUifUV128(mce, vatomX, vatomY);
2013    at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
2014    at = mkPCastTo(mce, Ity_I64, at);
2015    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2016    return at;
2017 }
2018 
2019 static
unary64F0x2(MCEnv * mce,IRAtom * vatomX)2020 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2021 {
2022    IRAtom* at;
2023    tl_assert(isShadowAtom(mce, vatomX));
2024    at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
2025    at = mkPCastTo(mce, Ity_I64, at);
2026    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2027    return at;
2028 }
2029 
2030 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
2031 
2032 static
binary32Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2033 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2034 {
2035    IRAtom* at;
2036    tl_assert(isShadowAtom(mce, vatomX));
2037    tl_assert(isShadowAtom(mce, vatomY));
2038    at = mkUifU64(mce, vatomX, vatomY);
2039    at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2040    return at;
2041 }
2042 
2043 static
unary32Fx2(MCEnv * mce,IRAtom * vatomX)2044 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2045 {
2046    IRAtom* at;
2047    tl_assert(isShadowAtom(mce, vatomX));
2048    at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2049    return at;
2050 }
2051 
2052 /* --- ... and ... 64Fx4 versions of the same ... --- */
2053 
2054 static
binary64Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2055 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2056 {
2057    IRAtom* at;
2058    tl_assert(isShadowAtom(mce, vatomX));
2059    tl_assert(isShadowAtom(mce, vatomY));
2060    at = mkUifUV256(mce, vatomX, vatomY);
2061    at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2062    return at;
2063 }
2064 
2065 static
unary64Fx4(MCEnv * mce,IRAtom * vatomX)2066 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2067 {
2068    IRAtom* at;
2069    tl_assert(isShadowAtom(mce, vatomX));
2070    at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2071    return at;
2072 }
2073 
2074 /* --- ... and ... 32Fx8 versions of the same ... --- */
2075 
2076 static
binary32Fx8(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2077 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2078 {
2079    IRAtom* at;
2080    tl_assert(isShadowAtom(mce, vatomX));
2081    tl_assert(isShadowAtom(mce, vatomY));
2082    at = mkUifUV256(mce, vatomX, vatomY);
2083    at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2084    return at;
2085 }
2086 
2087 static
unary32Fx8(MCEnv * mce,IRAtom * vatomX)2088 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2089 {
2090    IRAtom* at;
2091    tl_assert(isShadowAtom(mce, vatomX));
2092    at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2093    return at;
2094 }
2095 
2096 /* --- --- Vector saturated narrowing --- --- */
2097 
2098 /* We used to do something very clever here, but on closer inspection
2099    (2011-Jun-15), and in particular bug #279698, it turns out to be
2100    wrong.  Part of the problem came from the fact that for a long
2101    time, the IR primops to do with saturated narrowing were
2102    underspecified and managed to confuse multiple cases which needed
2103    to be separate: the op names had a signedness qualifier, but in
2104    fact the source and destination signednesses needed to be specified
2105    independently, so the op names really need two independent
2106    signedness specifiers.
2107 
2108    As of 2011-Jun-15 (ish) the underspecification was sorted out
2109    properly.  The incorrect instrumentation remained, though.  That
2110    has now (2011-Oct-22) been fixed.
2111 
2112    What we now do is simple:
2113 
2114    Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2115    number of lanes, X is the source lane width and signedness, and Y
2116    is the destination lane width and signedness.  In all cases the
2117    destination lane width is half the source lane width, so the names
2118    have a bit of redundancy, but are at least easy to read.
2119 
2120    For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2121    to unsigned 16s.
2122 
2123    Let Vanilla(OP) be a function that takes OP, one of these
2124    saturating narrowing ops, and produces the same "shaped" narrowing
2125    op which is not saturating, but merely dumps the most significant
2126    bits.  "same shape" means that the lane numbers and widths are the
2127    same as with OP.
2128 
2129    For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2130                   = Iop_NarrowBin32to16x8,
2131    that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2132    dumping the top half of each lane.
2133 
2134    So, with that in place, the scheme is simple, and it is simple to
2135    pessimise each lane individually and then apply Vanilla(OP) so as
2136    to get the result in the right "shape".  If the original OP is
2137    QNarrowBinXtoYxZ then we produce
2138 
2139    Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2140 
2141    or for the case when OP is unary (Iop_QNarrowUn*)
2142 
2143    Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2144 */
2145 static
vanillaNarrowingOpOfShape(IROp qnarrowOp)2146 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2147 {
2148    switch (qnarrowOp) {
2149       /* Binary: (128, 128) -> 128 */
2150       case Iop_QNarrowBin16Sto8Ux16:
2151       case Iop_QNarrowBin16Sto8Sx16:
2152       case Iop_QNarrowBin16Uto8Ux16:
2153          return Iop_NarrowBin16to8x16;
2154       case Iop_QNarrowBin32Sto16Ux8:
2155       case Iop_QNarrowBin32Sto16Sx8:
2156       case Iop_QNarrowBin32Uto16Ux8:
2157          return Iop_NarrowBin32to16x8;
2158       /* Binary: (64, 64) -> 64 */
2159       case Iop_QNarrowBin32Sto16Sx4:
2160          return Iop_NarrowBin32to16x4;
2161       case Iop_QNarrowBin16Sto8Ux8:
2162       case Iop_QNarrowBin16Sto8Sx8:
2163          return Iop_NarrowBin16to8x8;
2164       /* Unary: 128 -> 64 */
2165       case Iop_QNarrowUn64Uto32Ux2:
2166       case Iop_QNarrowUn64Sto32Sx2:
2167       case Iop_QNarrowUn64Sto32Ux2:
2168          return Iop_NarrowUn64to32x2;
2169       case Iop_QNarrowUn32Uto16Ux4:
2170       case Iop_QNarrowUn32Sto16Sx4:
2171       case Iop_QNarrowUn32Sto16Ux4:
2172          return Iop_NarrowUn32to16x4;
2173       case Iop_QNarrowUn16Uto8Ux8:
2174       case Iop_QNarrowUn16Sto8Sx8:
2175       case Iop_QNarrowUn16Sto8Ux8:
2176          return Iop_NarrowUn16to8x8;
2177       default:
2178          ppIROp(qnarrowOp);
2179          VG_(tool_panic)("vanillaNarrowOpOfShape");
2180    }
2181 }
2182 
2183 static
vectorNarrowBinV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)2184 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2185                               IRAtom* vatom1, IRAtom* vatom2)
2186 {
2187    IRAtom *at1, *at2, *at3;
2188    IRAtom* (*pcast)( MCEnv*, IRAtom* );
2189    switch (narrow_op) {
2190       case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2191       case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2192       case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2193       case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2194       case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2195       case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2196       default: VG_(tool_panic)("vectorNarrowBinV128");
2197    }
2198    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2199    tl_assert(isShadowAtom(mce,vatom1));
2200    tl_assert(isShadowAtom(mce,vatom2));
2201    at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2202    at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2203    at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
2204    return at3;
2205 }
2206 
2207 static
vectorNarrowBin64(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)2208 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2209                             IRAtom* vatom1, IRAtom* vatom2)
2210 {
2211    IRAtom *at1, *at2, *at3;
2212    IRAtom* (*pcast)( MCEnv*, IRAtom* );
2213    switch (narrow_op) {
2214       case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2215       case Iop_QNarrowBin16Sto8Sx8:  pcast = mkPCast16x4; break;
2216       case Iop_QNarrowBin16Sto8Ux8:  pcast = mkPCast16x4; break;
2217       default: VG_(tool_panic)("vectorNarrowBin64");
2218    }
2219    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2220    tl_assert(isShadowAtom(mce,vatom1));
2221    tl_assert(isShadowAtom(mce,vatom2));
2222    at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2223    at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2224    at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
2225    return at3;
2226 }
2227 
2228 static
vectorNarrowUnV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1)2229 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
2230                              IRAtom* vatom1)
2231 {
2232    IRAtom *at1, *at2;
2233    IRAtom* (*pcast)( MCEnv*, IRAtom* );
2234    tl_assert(isShadowAtom(mce,vatom1));
2235    /* For vanilla narrowing (non-saturating), we can just apply
2236       the op directly to the V bits. */
2237    switch (narrow_op) {
2238       case Iop_NarrowUn16to8x8:
2239       case Iop_NarrowUn32to16x4:
2240       case Iop_NarrowUn64to32x2:
2241          at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2242          return at1;
2243       default:
2244          break; /* Do Plan B */
2245    }
2246    /* Plan B: for ops that involve a saturation operation on the args,
2247       we must PCast before the vanilla narrow. */
2248    switch (narrow_op) {
2249       case Iop_QNarrowUn16Sto8Sx8:  pcast = mkPCast16x8; break;
2250       case Iop_QNarrowUn16Sto8Ux8:  pcast = mkPCast16x8; break;
2251       case Iop_QNarrowUn16Uto8Ux8:  pcast = mkPCast16x8; break;
2252       case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2253       case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2254       case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2255       case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2256       case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2257       case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2258       default: VG_(tool_panic)("vectorNarrowUnV128");
2259    }
2260    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2261    at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2262    at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
2263    return at2;
2264 }
2265 
2266 static
vectorWidenI64(MCEnv * mce,IROp longen_op,IRAtom * vatom1)2267 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2268                          IRAtom* vatom1)
2269 {
2270    IRAtom *at1, *at2;
2271    IRAtom* (*pcast)( MCEnv*, IRAtom* );
2272    switch (longen_op) {
2273       case Iop_Widen8Uto16x8:  pcast = mkPCast16x8; break;
2274       case Iop_Widen8Sto16x8:  pcast = mkPCast16x8; break;
2275       case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2276       case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2277       case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2278       case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2279       default: VG_(tool_panic)("vectorWidenI64");
2280    }
2281    tl_assert(isShadowAtom(mce,vatom1));
2282    at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2283    at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2284    return at2;
2285 }
2286 
2287 
2288 /* --- --- Vector integer arithmetic --- --- */
2289 
2290 /* Simple ... UifU the args and per-lane pessimise the results. */
2291 
2292 /* --- V128-bit versions --- */
2293 
2294 static
binary8Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2295 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2296 {
2297    IRAtom* at;
2298    at = mkUifUV128(mce, vatom1, vatom2);
2299    at = mkPCast8x16(mce, at);
2300    return at;
2301 }
2302 
2303 static
binary16Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2304 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2305 {
2306    IRAtom* at;
2307    at = mkUifUV128(mce, vatom1, vatom2);
2308    at = mkPCast16x8(mce, at);
2309    return at;
2310 }
2311 
2312 static
binary32Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2313 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2314 {
2315    IRAtom* at;
2316    at = mkUifUV128(mce, vatom1, vatom2);
2317    at = mkPCast32x4(mce, at);
2318    return at;
2319 }
2320 
2321 static
binary64Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2322 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2323 {
2324    IRAtom* at;
2325    at = mkUifUV128(mce, vatom1, vatom2);
2326    at = mkPCast64x2(mce, at);
2327    return at;
2328 }
2329 
2330 /* --- 64-bit versions --- */
2331 
2332 static
binary8Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2333 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2334 {
2335    IRAtom* at;
2336    at = mkUifU64(mce, vatom1, vatom2);
2337    at = mkPCast8x8(mce, at);
2338    return at;
2339 }
2340 
2341 static
binary16Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2342 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2343 {
2344    IRAtom* at;
2345    at = mkUifU64(mce, vatom1, vatom2);
2346    at = mkPCast16x4(mce, at);
2347    return at;
2348 }
2349 
2350 static
binary32Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2351 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2352 {
2353    IRAtom* at;
2354    at = mkUifU64(mce, vatom1, vatom2);
2355    at = mkPCast32x2(mce, at);
2356    return at;
2357 }
2358 
2359 static
binary64Ix1(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2360 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2361 {
2362    IRAtom* at;
2363    at = mkUifU64(mce, vatom1, vatom2);
2364    at = mkPCastTo(mce, Ity_I64, at);
2365    return at;
2366 }
2367 
2368 /* --- 32-bit versions --- */
2369 
2370 static
binary8Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2371 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2372 {
2373    IRAtom* at;
2374    at = mkUifU32(mce, vatom1, vatom2);
2375    at = mkPCast8x4(mce, at);
2376    return at;
2377 }
2378 
2379 static
binary16Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2380 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2381 {
2382    IRAtom* at;
2383    at = mkUifU32(mce, vatom1, vatom2);
2384    at = mkPCast16x2(mce, at);
2385    return at;
2386 }
2387 
2388 
2389 /*------------------------------------------------------------*/
2390 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
2391 /*------------------------------------------------------------*/
2392 
2393 static
expr2vbits_Qop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2,IRAtom * atom3,IRAtom * atom4)2394 IRAtom* expr2vbits_Qop ( MCEnv* mce,
2395                          IROp op,
2396                          IRAtom* atom1, IRAtom* atom2,
2397                          IRAtom* atom3, IRAtom* atom4 )
2398 {
2399    IRAtom* vatom1 = expr2vbits( mce, atom1 );
2400    IRAtom* vatom2 = expr2vbits( mce, atom2 );
2401    IRAtom* vatom3 = expr2vbits( mce, atom3 );
2402    IRAtom* vatom4 = expr2vbits( mce, atom4 );
2403 
2404    tl_assert(isOriginalAtom(mce,atom1));
2405    tl_assert(isOriginalAtom(mce,atom2));
2406    tl_assert(isOriginalAtom(mce,atom3));
2407    tl_assert(isOriginalAtom(mce,atom4));
2408    tl_assert(isShadowAtom(mce,vatom1));
2409    tl_assert(isShadowAtom(mce,vatom2));
2410    tl_assert(isShadowAtom(mce,vatom3));
2411    tl_assert(isShadowAtom(mce,vatom4));
2412    tl_assert(sameKindedAtoms(atom1,vatom1));
2413    tl_assert(sameKindedAtoms(atom2,vatom2));
2414    tl_assert(sameKindedAtoms(atom3,vatom3));
2415    tl_assert(sameKindedAtoms(atom4,vatom4));
2416    switch (op) {
2417       case Iop_MAddF64:
2418       case Iop_MAddF64r32:
2419       case Iop_MSubF64:
2420       case Iop_MSubF64r32:
2421          /* I32(rm) x F64 x F64 x F64 -> F64 */
2422          return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2423 
2424       case Iop_MAddF32:
2425       case Iop_MSubF32:
2426          /* I32(rm) x F32 x F32 x F32 -> F32 */
2427          return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2428 
2429       /* V256-bit data-steering */
2430       case Iop_64x4toV256:
2431          return assignNew('V', mce, Ity_V256,
2432                           IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2433 
2434       default:
2435          ppIROp(op);
2436          VG_(tool_panic)("memcheck:expr2vbits_Qop");
2437    }
2438 }
2439 
2440 
2441 static
expr2vbits_Triop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2,IRAtom * atom3)2442 IRAtom* expr2vbits_Triop ( MCEnv* mce,
2443                            IROp op,
2444                            IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2445 {
2446    IRAtom* vatom1 = expr2vbits( mce, atom1 );
2447    IRAtom* vatom2 = expr2vbits( mce, atom2 );
2448    IRAtom* vatom3 = expr2vbits( mce, atom3 );
2449 
2450    tl_assert(isOriginalAtom(mce,atom1));
2451    tl_assert(isOriginalAtom(mce,atom2));
2452    tl_assert(isOriginalAtom(mce,atom3));
2453    tl_assert(isShadowAtom(mce,vatom1));
2454    tl_assert(isShadowAtom(mce,vatom2));
2455    tl_assert(isShadowAtom(mce,vatom3));
2456    tl_assert(sameKindedAtoms(atom1,vatom1));
2457    tl_assert(sameKindedAtoms(atom2,vatom2));
2458    tl_assert(sameKindedAtoms(atom3,vatom3));
2459    switch (op) {
2460       case Iop_AddF128:
2461       case Iop_AddD128:
2462       case Iop_SubF128:
2463       case Iop_SubD128:
2464       case Iop_MulF128:
2465       case Iop_MulD128:
2466       case Iop_DivF128:
2467       case Iop_DivD128:
2468       case Iop_QuantizeD128:
2469          /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
2470          return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2471       case Iop_AddF64:
2472       case Iop_AddD64:
2473       case Iop_AddF64r32:
2474       case Iop_SubF64:
2475       case Iop_SubD64:
2476       case Iop_SubF64r32:
2477       case Iop_MulF64:
2478       case Iop_MulD64:
2479       case Iop_MulF64r32:
2480       case Iop_DivF64:
2481       case Iop_DivD64:
2482       case Iop_DivF64r32:
2483       case Iop_ScaleF64:
2484       case Iop_Yl2xF64:
2485       case Iop_Yl2xp1F64:
2486       case Iop_AtanF64:
2487       case Iop_PRemF64:
2488       case Iop_PRem1F64:
2489       case Iop_QuantizeD64:
2490          /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
2491          return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2492       case Iop_PRemC3210F64:
2493       case Iop_PRem1C3210F64:
2494          /* I32(rm) x F64 x F64 -> I32 */
2495          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2496       case Iop_AddF32:
2497       case Iop_SubF32:
2498       case Iop_MulF32:
2499       case Iop_DivF32:
2500          /* I32(rm) x F32 x F32 -> I32 */
2501          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2502       case Iop_SignificanceRoundD64:
2503          /* IRRoundingModeDFP(I32) x I8 x D64 -> D64 */
2504          return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2505       case Iop_SignificanceRoundD128:
2506          /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */
2507          return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2508       case Iop_ExtractV128:
2509          complainIfUndefined(mce, atom3, NULL);
2510          return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2511       case Iop_Extract64:
2512          complainIfUndefined(mce, atom3, NULL);
2513          return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2514       case Iop_SetElem8x8:
2515       case Iop_SetElem16x4:
2516       case Iop_SetElem32x2:
2517          complainIfUndefined(mce, atom2, NULL);
2518          return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
2519       default:
2520          ppIROp(op);
2521          VG_(tool_panic)("memcheck:expr2vbits_Triop");
2522    }
2523 }
2524 
2525 
2526 static
expr2vbits_Binop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2)2527 IRAtom* expr2vbits_Binop ( MCEnv* mce,
2528                            IROp op,
2529                            IRAtom* atom1, IRAtom* atom2 )
2530 {
2531    IRType  and_or_ty;
2532    IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
2533    IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
2534    IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2535 
2536    IRAtom* vatom1 = expr2vbits( mce, atom1 );
2537    IRAtom* vatom2 = expr2vbits( mce, atom2 );
2538 
2539    tl_assert(isOriginalAtom(mce,atom1));
2540    tl_assert(isOriginalAtom(mce,atom2));
2541    tl_assert(isShadowAtom(mce,vatom1));
2542    tl_assert(isShadowAtom(mce,vatom2));
2543    tl_assert(sameKindedAtoms(atom1,vatom1));
2544    tl_assert(sameKindedAtoms(atom2,vatom2));
2545    switch (op) {
2546 
2547       /* 32-bit SIMD */
2548 
2549       case Iop_Add16x2:
2550       case Iop_HAdd16Ux2:
2551       case Iop_HAdd16Sx2:
2552       case Iop_Sub16x2:
2553       case Iop_HSub16Ux2:
2554       case Iop_HSub16Sx2:
2555       case Iop_QAdd16Sx2:
2556       case Iop_QSub16Sx2:
2557       case Iop_QSub16Ux2:
2558          return binary16Ix2(mce, vatom1, vatom2);
2559 
2560       case Iop_Add8x4:
2561       case Iop_HAdd8Ux4:
2562       case Iop_HAdd8Sx4:
2563       case Iop_Sub8x4:
2564       case Iop_HSub8Ux4:
2565       case Iop_HSub8Sx4:
2566       case Iop_QSub8Ux4:
2567       case Iop_QAdd8Ux4:
2568       case Iop_QSub8Sx4:
2569       case Iop_QAdd8Sx4:
2570          return binary8Ix4(mce, vatom1, vatom2);
2571 
2572       /* 64-bit SIMD */
2573 
2574       case Iop_ShrN8x8:
2575       case Iop_ShrN16x4:
2576       case Iop_ShrN32x2:
2577       case Iop_SarN8x8:
2578       case Iop_SarN16x4:
2579       case Iop_SarN32x2:
2580       case Iop_ShlN16x4:
2581       case Iop_ShlN32x2:
2582       case Iop_ShlN8x8:
2583          /* Same scheme as with all other shifts. */
2584          complainIfUndefined(mce, atom2, NULL);
2585          return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2586 
2587       case Iop_QNarrowBin32Sto16Sx4:
2588       case Iop_QNarrowBin16Sto8Sx8:
2589       case Iop_QNarrowBin16Sto8Ux8:
2590          return vectorNarrowBin64(mce, op, vatom1, vatom2);
2591 
2592       case Iop_Min8Ux8:
2593       case Iop_Min8Sx8:
2594       case Iop_Max8Ux8:
2595       case Iop_Max8Sx8:
2596       case Iop_Avg8Ux8:
2597       case Iop_QSub8Sx8:
2598       case Iop_QSub8Ux8:
2599       case Iop_Sub8x8:
2600       case Iop_CmpGT8Sx8:
2601       case Iop_CmpGT8Ux8:
2602       case Iop_CmpEQ8x8:
2603       case Iop_QAdd8Sx8:
2604       case Iop_QAdd8Ux8:
2605       case Iop_QSal8x8:
2606       case Iop_QShl8x8:
2607       case Iop_Add8x8:
2608       case Iop_Mul8x8:
2609       case Iop_PolynomialMul8x8:
2610          return binary8Ix8(mce, vatom1, vatom2);
2611 
2612       case Iop_Min16Sx4:
2613       case Iop_Min16Ux4:
2614       case Iop_Max16Sx4:
2615       case Iop_Max16Ux4:
2616       case Iop_Avg16Ux4:
2617       case Iop_QSub16Ux4:
2618       case Iop_QSub16Sx4:
2619       case Iop_Sub16x4:
2620       case Iop_Mul16x4:
2621       case Iop_MulHi16Sx4:
2622       case Iop_MulHi16Ux4:
2623       case Iop_CmpGT16Sx4:
2624       case Iop_CmpGT16Ux4:
2625       case Iop_CmpEQ16x4:
2626       case Iop_QAdd16Sx4:
2627       case Iop_QAdd16Ux4:
2628       case Iop_QSal16x4:
2629       case Iop_QShl16x4:
2630       case Iop_Add16x4:
2631       case Iop_QDMulHi16Sx4:
2632       case Iop_QRDMulHi16Sx4:
2633          return binary16Ix4(mce, vatom1, vatom2);
2634 
2635       case Iop_Sub32x2:
2636       case Iop_Mul32x2:
2637       case Iop_Max32Sx2:
2638       case Iop_Max32Ux2:
2639       case Iop_Min32Sx2:
2640       case Iop_Min32Ux2:
2641       case Iop_CmpGT32Sx2:
2642       case Iop_CmpGT32Ux2:
2643       case Iop_CmpEQ32x2:
2644       case Iop_Add32x2:
2645       case Iop_QAdd32Ux2:
2646       case Iop_QAdd32Sx2:
2647       case Iop_QSub32Ux2:
2648       case Iop_QSub32Sx2:
2649       case Iop_QSal32x2:
2650       case Iop_QShl32x2:
2651       case Iop_QDMulHi32Sx2:
2652       case Iop_QRDMulHi32Sx2:
2653          return binary32Ix2(mce, vatom1, vatom2);
2654 
2655       case Iop_QSub64Ux1:
2656       case Iop_QSub64Sx1:
2657       case Iop_QAdd64Ux1:
2658       case Iop_QAdd64Sx1:
2659       case Iop_QSal64x1:
2660       case Iop_QShl64x1:
2661       case Iop_Sal64x1:
2662          return binary64Ix1(mce, vatom1, vatom2);
2663 
2664       case Iop_QShlN8Sx8:
2665       case Iop_QShlN8x8:
2666       case Iop_QSalN8x8:
2667          complainIfUndefined(mce, atom2, NULL);
2668          return mkPCast8x8(mce, vatom1);
2669 
2670       case Iop_QShlN16Sx4:
2671       case Iop_QShlN16x4:
2672       case Iop_QSalN16x4:
2673          complainIfUndefined(mce, atom2, NULL);
2674          return mkPCast16x4(mce, vatom1);
2675 
2676       case Iop_QShlN32Sx2:
2677       case Iop_QShlN32x2:
2678       case Iop_QSalN32x2:
2679          complainIfUndefined(mce, atom2, NULL);
2680          return mkPCast32x2(mce, vatom1);
2681 
2682       case Iop_QShlN64Sx1:
2683       case Iop_QShlN64x1:
2684       case Iop_QSalN64x1:
2685          complainIfUndefined(mce, atom2, NULL);
2686          return mkPCast32x2(mce, vatom1);
2687 
2688       case Iop_PwMax32Sx2:
2689       case Iop_PwMax32Ux2:
2690       case Iop_PwMin32Sx2:
2691       case Iop_PwMin32Ux2:
2692       case Iop_PwMax32Fx2:
2693       case Iop_PwMin32Fx2:
2694          return assignNew('V', mce, Ity_I64,
2695                           binop(Iop_PwMax32Ux2,
2696                                 mkPCast32x2(mce, vatom1),
2697                                 mkPCast32x2(mce, vatom2)));
2698 
2699       case Iop_PwMax16Sx4:
2700       case Iop_PwMax16Ux4:
2701       case Iop_PwMin16Sx4:
2702       case Iop_PwMin16Ux4:
2703          return assignNew('V', mce, Ity_I64,
2704                           binop(Iop_PwMax16Ux4,
2705                                 mkPCast16x4(mce, vatom1),
2706                                 mkPCast16x4(mce, vatom2)));
2707 
2708       case Iop_PwMax8Sx8:
2709       case Iop_PwMax8Ux8:
2710       case Iop_PwMin8Sx8:
2711       case Iop_PwMin8Ux8:
2712          return assignNew('V', mce, Ity_I64,
2713                           binop(Iop_PwMax8Ux8,
2714                                 mkPCast8x8(mce, vatom1),
2715                                 mkPCast8x8(mce, vatom2)));
2716 
2717       case Iop_PwAdd32x2:
2718       case Iop_PwAdd32Fx2:
2719          return mkPCast32x2(mce,
2720                assignNew('V', mce, Ity_I64,
2721                          binop(Iop_PwAdd32x2,
2722                                mkPCast32x2(mce, vatom1),
2723                                mkPCast32x2(mce, vatom2))));
2724 
2725       case Iop_PwAdd16x4:
2726          return mkPCast16x4(mce,
2727                assignNew('V', mce, Ity_I64,
2728                          binop(op, mkPCast16x4(mce, vatom1),
2729                                    mkPCast16x4(mce, vatom2))));
2730 
2731       case Iop_PwAdd8x8:
2732          return mkPCast8x8(mce,
2733                assignNew('V', mce, Ity_I64,
2734                          binop(op, mkPCast8x8(mce, vatom1),
2735                                    mkPCast8x8(mce, vatom2))));
2736 
2737       case Iop_Shl8x8:
2738       case Iop_Shr8x8:
2739       case Iop_Sar8x8:
2740       case Iop_Sal8x8:
2741          return mkUifU64(mce,
2742                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2743                    mkPCast8x8(mce,vatom2)
2744                 );
2745 
2746       case Iop_Shl16x4:
2747       case Iop_Shr16x4:
2748       case Iop_Sar16x4:
2749       case Iop_Sal16x4:
2750          return mkUifU64(mce,
2751                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2752                    mkPCast16x4(mce,vatom2)
2753                 );
2754 
2755       case Iop_Shl32x2:
2756       case Iop_Shr32x2:
2757       case Iop_Sar32x2:
2758       case Iop_Sal32x2:
2759          return mkUifU64(mce,
2760                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2761                    mkPCast32x2(mce,vatom2)
2762                 );
2763 
2764       /* 64-bit data-steering */
2765       case Iop_InterleaveLO32x2:
2766       case Iop_InterleaveLO16x4:
2767       case Iop_InterleaveLO8x8:
2768       case Iop_InterleaveHI32x2:
2769       case Iop_InterleaveHI16x4:
2770       case Iop_InterleaveHI8x8:
2771       case Iop_CatOddLanes8x8:
2772       case Iop_CatEvenLanes8x8:
2773       case Iop_CatOddLanes16x4:
2774       case Iop_CatEvenLanes16x4:
2775       case Iop_InterleaveOddLanes8x8:
2776       case Iop_InterleaveEvenLanes8x8:
2777       case Iop_InterleaveOddLanes16x4:
2778       case Iop_InterleaveEvenLanes16x4:
2779          return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
2780 
2781       case Iop_GetElem8x8:
2782          complainIfUndefined(mce, atom2, NULL);
2783          return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2784       case Iop_GetElem16x4:
2785          complainIfUndefined(mce, atom2, NULL);
2786          return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2787       case Iop_GetElem32x2:
2788          complainIfUndefined(mce, atom2, NULL);
2789          return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2790 
2791       /* Perm8x8: rearrange values in left arg using steering values
2792         from right arg.  So rearrange the vbits in the same way but
2793         pessimise wrt steering values. */
2794       case Iop_Perm8x8:
2795          return mkUifU64(
2796                    mce,
2797                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2798                    mkPCast8x8(mce, vatom2)
2799                 );
2800 
2801       /* V128-bit SIMD */
2802 
2803       case Iop_ShrN8x16:
2804       case Iop_ShrN16x8:
2805       case Iop_ShrN32x4:
2806       case Iop_ShrN64x2:
2807       case Iop_SarN8x16:
2808       case Iop_SarN16x8:
2809       case Iop_SarN32x4:
2810       case Iop_SarN64x2:
2811       case Iop_ShlN8x16:
2812       case Iop_ShlN16x8:
2813       case Iop_ShlN32x4:
2814       case Iop_ShlN64x2:
2815          /* Same scheme as with all other shifts.  Note: 22 Oct 05:
2816             this is wrong now, scalar shifts are done properly lazily.
2817             Vector shifts should be fixed too. */
2818          complainIfUndefined(mce, atom2, NULL);
2819          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
2820 
2821       /* V x V shifts/rotates are done using the standard lazy scheme. */
2822       case Iop_Shl8x16:
2823       case Iop_Shr8x16:
2824       case Iop_Sar8x16:
2825       case Iop_Sal8x16:
2826       case Iop_Rol8x16:
2827          return mkUifUV128(mce,
2828                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2829                    mkPCast8x16(mce,vatom2)
2830                 );
2831 
2832       case Iop_Shl16x8:
2833       case Iop_Shr16x8:
2834       case Iop_Sar16x8:
2835       case Iop_Sal16x8:
2836       case Iop_Rol16x8:
2837          return mkUifUV128(mce,
2838                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2839                    mkPCast16x8(mce,vatom2)
2840                 );
2841 
2842       case Iop_Shl32x4:
2843       case Iop_Shr32x4:
2844       case Iop_Sar32x4:
2845       case Iop_Sal32x4:
2846       case Iop_Rol32x4:
2847          return mkUifUV128(mce,
2848                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2849                    mkPCast32x4(mce,vatom2)
2850                 );
2851 
2852       case Iop_Shl64x2:
2853       case Iop_Shr64x2:
2854       case Iop_Sar64x2:
2855       case Iop_Sal64x2:
2856          return mkUifUV128(mce,
2857                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2858                    mkPCast64x2(mce,vatom2)
2859                 );
2860 
2861       case Iop_F32ToFixed32Ux4_RZ:
2862       case Iop_F32ToFixed32Sx4_RZ:
2863       case Iop_Fixed32UToF32x4_RN:
2864       case Iop_Fixed32SToF32x4_RN:
2865          complainIfUndefined(mce, atom2, NULL);
2866          return mkPCast32x4(mce, vatom1);
2867 
2868       case Iop_F32ToFixed32Ux2_RZ:
2869       case Iop_F32ToFixed32Sx2_RZ:
2870       case Iop_Fixed32UToF32x2_RN:
2871       case Iop_Fixed32SToF32x2_RN:
2872          complainIfUndefined(mce, atom2, NULL);
2873          return mkPCast32x2(mce, vatom1);
2874 
2875       case Iop_QSub8Ux16:
2876       case Iop_QSub8Sx16:
2877       case Iop_Sub8x16:
2878       case Iop_Min8Ux16:
2879       case Iop_Min8Sx16:
2880       case Iop_Max8Ux16:
2881       case Iop_Max8Sx16:
2882       case Iop_CmpGT8Sx16:
2883       case Iop_CmpGT8Ux16:
2884       case Iop_CmpEQ8x16:
2885       case Iop_Avg8Ux16:
2886       case Iop_Avg8Sx16:
2887       case Iop_QAdd8Ux16:
2888       case Iop_QAdd8Sx16:
2889       case Iop_QSal8x16:
2890       case Iop_QShl8x16:
2891       case Iop_Add8x16:
2892       case Iop_Mul8x16:
2893       case Iop_PolynomialMul8x16:
2894          return binary8Ix16(mce, vatom1, vatom2);
2895 
2896       case Iop_QSub16Ux8:
2897       case Iop_QSub16Sx8:
2898       case Iop_Sub16x8:
2899       case Iop_Mul16x8:
2900       case Iop_MulHi16Sx8:
2901       case Iop_MulHi16Ux8:
2902       case Iop_Min16Sx8:
2903       case Iop_Min16Ux8:
2904       case Iop_Max16Sx8:
2905       case Iop_Max16Ux8:
2906       case Iop_CmpGT16Sx8:
2907       case Iop_CmpGT16Ux8:
2908       case Iop_CmpEQ16x8:
2909       case Iop_Avg16Ux8:
2910       case Iop_Avg16Sx8:
2911       case Iop_QAdd16Ux8:
2912       case Iop_QAdd16Sx8:
2913       case Iop_QSal16x8:
2914       case Iop_QShl16x8:
2915       case Iop_Add16x8:
2916       case Iop_QDMulHi16Sx8:
2917       case Iop_QRDMulHi16Sx8:
2918          return binary16Ix8(mce, vatom1, vatom2);
2919 
2920       case Iop_Sub32x4:
2921       case Iop_CmpGT32Sx4:
2922       case Iop_CmpGT32Ux4:
2923       case Iop_CmpEQ32x4:
2924       case Iop_QAdd32Sx4:
2925       case Iop_QAdd32Ux4:
2926       case Iop_QSub32Sx4:
2927       case Iop_QSub32Ux4:
2928       case Iop_QSal32x4:
2929       case Iop_QShl32x4:
2930       case Iop_Avg32Ux4:
2931       case Iop_Avg32Sx4:
2932       case Iop_Add32x4:
2933       case Iop_Max32Ux4:
2934       case Iop_Max32Sx4:
2935       case Iop_Min32Ux4:
2936       case Iop_Min32Sx4:
2937       case Iop_Mul32x4:
2938       case Iop_QDMulHi32Sx4:
2939       case Iop_QRDMulHi32Sx4:
2940          return binary32Ix4(mce, vatom1, vatom2);
2941 
2942       case Iop_Sub64x2:
2943       case Iop_Add64x2:
2944       case Iop_CmpEQ64x2:
2945       case Iop_CmpGT64Sx2:
2946       case Iop_QSal64x2:
2947       case Iop_QShl64x2:
2948       case Iop_QAdd64Ux2:
2949       case Iop_QAdd64Sx2:
2950       case Iop_QSub64Ux2:
2951       case Iop_QSub64Sx2:
2952          return binary64Ix2(mce, vatom1, vatom2);
2953 
2954       case Iop_QNarrowBin32Sto16Sx8:
2955       case Iop_QNarrowBin32Uto16Ux8:
2956       case Iop_QNarrowBin32Sto16Ux8:
2957       case Iop_QNarrowBin16Sto8Sx16:
2958       case Iop_QNarrowBin16Uto8Ux16:
2959       case Iop_QNarrowBin16Sto8Ux16:
2960          return vectorNarrowBinV128(mce, op, vatom1, vatom2);
2961 
2962       case Iop_Sub64Fx2:
2963       case Iop_Mul64Fx2:
2964       case Iop_Min64Fx2:
2965       case Iop_Max64Fx2:
2966       case Iop_Div64Fx2:
2967       case Iop_CmpLT64Fx2:
2968       case Iop_CmpLE64Fx2:
2969       case Iop_CmpEQ64Fx2:
2970       case Iop_CmpUN64Fx2:
2971       case Iop_Add64Fx2:
2972          return binary64Fx2(mce, vatom1, vatom2);
2973 
2974       case Iop_Sub64F0x2:
2975       case Iop_Mul64F0x2:
2976       case Iop_Min64F0x2:
2977       case Iop_Max64F0x2:
2978       case Iop_Div64F0x2:
2979       case Iop_CmpLT64F0x2:
2980       case Iop_CmpLE64F0x2:
2981       case Iop_CmpEQ64F0x2:
2982       case Iop_CmpUN64F0x2:
2983       case Iop_Add64F0x2:
2984          return binary64F0x2(mce, vatom1, vatom2);
2985 
2986       case Iop_Sub32Fx4:
2987       case Iop_Mul32Fx4:
2988       case Iop_Min32Fx4:
2989       case Iop_Max32Fx4:
2990       case Iop_Div32Fx4:
2991       case Iop_CmpLT32Fx4:
2992       case Iop_CmpLE32Fx4:
2993       case Iop_CmpEQ32Fx4:
2994       case Iop_CmpUN32Fx4:
2995       case Iop_CmpGT32Fx4:
2996       case Iop_CmpGE32Fx4:
2997       case Iop_Add32Fx4:
2998       case Iop_Recps32Fx4:
2999       case Iop_Rsqrts32Fx4:
3000          return binary32Fx4(mce, vatom1, vatom2);
3001 
3002       case Iop_Sub32Fx2:
3003       case Iop_Mul32Fx2:
3004       case Iop_Min32Fx2:
3005       case Iop_Max32Fx2:
3006       case Iop_CmpEQ32Fx2:
3007       case Iop_CmpGT32Fx2:
3008       case Iop_CmpGE32Fx2:
3009       case Iop_Add32Fx2:
3010       case Iop_Recps32Fx2:
3011       case Iop_Rsqrts32Fx2:
3012          return binary32Fx2(mce, vatom1, vatom2);
3013 
3014       case Iop_Sub32F0x4:
3015       case Iop_Mul32F0x4:
3016       case Iop_Min32F0x4:
3017       case Iop_Max32F0x4:
3018       case Iop_Div32F0x4:
3019       case Iop_CmpLT32F0x4:
3020       case Iop_CmpLE32F0x4:
3021       case Iop_CmpEQ32F0x4:
3022       case Iop_CmpUN32F0x4:
3023       case Iop_Add32F0x4:
3024          return binary32F0x4(mce, vatom1, vatom2);
3025 
3026       case Iop_QShlN8Sx16:
3027       case Iop_QShlN8x16:
3028       case Iop_QSalN8x16:
3029          complainIfUndefined(mce, atom2, NULL);
3030          return mkPCast8x16(mce, vatom1);
3031 
3032       case Iop_QShlN16Sx8:
3033       case Iop_QShlN16x8:
3034       case Iop_QSalN16x8:
3035          complainIfUndefined(mce, atom2, NULL);
3036          return mkPCast16x8(mce, vatom1);
3037 
3038       case Iop_QShlN32Sx4:
3039       case Iop_QShlN32x4:
3040       case Iop_QSalN32x4:
3041          complainIfUndefined(mce, atom2, NULL);
3042          return mkPCast32x4(mce, vatom1);
3043 
3044       case Iop_QShlN64Sx2:
3045       case Iop_QShlN64x2:
3046       case Iop_QSalN64x2:
3047          complainIfUndefined(mce, atom2, NULL);
3048          return mkPCast32x4(mce, vatom1);
3049 
3050       case Iop_Mull32Sx2:
3051       case Iop_Mull32Ux2:
3052       case Iop_QDMulLong32Sx2:
3053          return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3054                                     mkUifU64(mce, vatom1, vatom2));
3055 
3056       case Iop_Mull16Sx4:
3057       case Iop_Mull16Ux4:
3058       case Iop_QDMulLong16Sx4:
3059          return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3060                                     mkUifU64(mce, vatom1, vatom2));
3061 
3062       case Iop_Mull8Sx8:
3063       case Iop_Mull8Ux8:
3064       case Iop_PolynomialMull8x8:
3065          return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3066                                     mkUifU64(mce, vatom1, vatom2));
3067 
3068       case Iop_PwAdd32x4:
3069          return mkPCast32x4(mce,
3070                assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3071                      mkPCast32x4(mce, vatom2))));
3072 
3073       case Iop_PwAdd16x8:
3074          return mkPCast16x8(mce,
3075                assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3076                      mkPCast16x8(mce, vatom2))));
3077 
3078       case Iop_PwAdd8x16:
3079          return mkPCast8x16(mce,
3080                assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3081                      mkPCast8x16(mce, vatom2))));
3082 
3083       /* V128-bit data-steering */
3084       case Iop_SetV128lo32:
3085       case Iop_SetV128lo64:
3086       case Iop_64HLtoV128:
3087       case Iop_InterleaveLO64x2:
3088       case Iop_InterleaveLO32x4:
3089       case Iop_InterleaveLO16x8:
3090       case Iop_InterleaveLO8x16:
3091       case Iop_InterleaveHI64x2:
3092       case Iop_InterleaveHI32x4:
3093       case Iop_InterleaveHI16x8:
3094       case Iop_InterleaveHI8x16:
3095       case Iop_CatOddLanes8x16:
3096       case Iop_CatOddLanes16x8:
3097       case Iop_CatOddLanes32x4:
3098       case Iop_CatEvenLanes8x16:
3099       case Iop_CatEvenLanes16x8:
3100       case Iop_CatEvenLanes32x4:
3101       case Iop_InterleaveOddLanes8x16:
3102       case Iop_InterleaveOddLanes16x8:
3103       case Iop_InterleaveOddLanes32x4:
3104       case Iop_InterleaveEvenLanes8x16:
3105       case Iop_InterleaveEvenLanes16x8:
3106       case Iop_InterleaveEvenLanes32x4:
3107          return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
3108 
3109       case Iop_GetElem8x16:
3110          complainIfUndefined(mce, atom2, NULL);
3111          return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3112       case Iop_GetElem16x8:
3113          complainIfUndefined(mce, atom2, NULL);
3114          return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3115       case Iop_GetElem32x4:
3116          complainIfUndefined(mce, atom2, NULL);
3117          return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3118       case Iop_GetElem64x2:
3119          complainIfUndefined(mce, atom2, NULL);
3120          return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3121 
3122      /* Perm8x16: rearrange values in left arg using steering values
3123         from right arg.  So rearrange the vbits in the same way but
3124         pessimise wrt steering values.  Perm32x4 ditto. */
3125       case Iop_Perm8x16:
3126          return mkUifUV128(
3127                    mce,
3128                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3129                    mkPCast8x16(mce, vatom2)
3130                 );
3131       case Iop_Perm32x4:
3132          return mkUifUV128(
3133                    mce,
3134                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3135                    mkPCast32x4(mce, vatom2)
3136                 );
3137 
3138      /* These two take the lower half of each 16-bit lane, sign/zero
3139         extend it to 32, and multiply together, producing a 32x4
3140         result (and implicitly ignoring half the operand bits).  So
3141         treat it as a bunch of independent 16x8 operations, but then
3142         do 32-bit shifts left-right to copy the lower half results
3143         (which are all 0s or all 1s due to PCasting in binary16Ix8)
3144         into the upper half of each result lane. */
3145       case Iop_MullEven16Ux8:
3146       case Iop_MullEven16Sx8: {
3147          IRAtom* at;
3148          at = binary16Ix8(mce,vatom1,vatom2);
3149          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3150          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
3151 	 return at;
3152       }
3153 
3154       /* Same deal as Iop_MullEven16{S,U}x8 */
3155       case Iop_MullEven8Ux16:
3156       case Iop_MullEven8Sx16: {
3157          IRAtom* at;
3158          at = binary8Ix16(mce,vatom1,vatom2);
3159          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3160          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
3161 	 return at;
3162       }
3163 
3164       /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3165          32x4 -> 16x8 laneage, discarding the upper half of each lane.
3166          Simply apply same op to the V bits, since this really no more
3167          than a data steering operation. */
3168       case Iop_NarrowBin32to16x8:
3169       case Iop_NarrowBin16to8x16:
3170          return assignNew('V', mce, Ity_V128,
3171                                     binop(op, vatom1, vatom2));
3172 
3173       case Iop_ShrV128:
3174       case Iop_ShlV128:
3175          /* Same scheme as with all other shifts.  Note: 10 Nov 05:
3176             this is wrong now, scalar shifts are done properly lazily.
3177             Vector shifts should be fixed too. */
3178          complainIfUndefined(mce, atom2, NULL);
3179          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3180 
3181       /* I128-bit data-steering */
3182       case Iop_64HLto128:
3183          return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
3184 
3185       /* V256-bit SIMD */
3186 
3187       case Iop_Add64Fx4:
3188       case Iop_Sub64Fx4:
3189       case Iop_Mul64Fx4:
3190       case Iop_Div64Fx4:
3191       case Iop_Max64Fx4:
3192       case Iop_Min64Fx4:
3193          return binary64Fx4(mce, vatom1, vatom2);
3194 
3195       case Iop_Add32Fx8:
3196       case Iop_Sub32Fx8:
3197       case Iop_Mul32Fx8:
3198       case Iop_Div32Fx8:
3199       case Iop_Max32Fx8:
3200       case Iop_Min32Fx8:
3201          return binary32Fx8(mce, vatom1, vatom2);
3202 
3203       /* V256-bit data-steering */
3204       case Iop_V128HLtoV256:
3205          return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3206 
3207       /* Scalar floating point */
3208 
3209       case Iop_F32toI64S:
3210          /* I32(rm) x F32 -> I64 */
3211          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3212 
3213       case Iop_I64StoF32:
3214          /* I32(rm) x I64 -> F32 */
3215          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3216 
3217       case Iop_RoundF64toInt:
3218       case Iop_RoundF64toF32:
3219       case Iop_F64toI64S:
3220       case Iop_F64toI64U:
3221       case Iop_I64StoF64:
3222       case Iop_I64UtoF64:
3223       case Iop_SinF64:
3224       case Iop_CosF64:
3225       case Iop_TanF64:
3226       case Iop_2xm1F64:
3227       case Iop_SqrtF64:
3228          /* I32(rm) x I64/F64 -> I64/F64 */
3229          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3230 
3231       case Iop_ShlD64:
3232       case Iop_ShrD64:
3233       case Iop_RoundD64toInt:
3234          /* I32(DFP rm) x D64 -> D64 */
3235          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3236 
3237       case Iop_ShlD128:
3238       case Iop_ShrD128:
3239       case Iop_RoundD128toInt:
3240          /* I32(DFP rm) x D128 -> D128 */
3241          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3242 
3243       case Iop_D64toI64S:
3244       case Iop_I64StoD64:
3245          /* I64(DFP rm) x I64 -> D64 */
3246          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3247 
3248       case Iop_RoundF32toInt:
3249       case Iop_SqrtF32:
3250          /* I32(rm) x I32/F32 -> I32/F32 */
3251          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3252 
3253       case Iop_SqrtF128:
3254          /* I32(rm) x F128 -> F128 */
3255          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3256 
3257       case Iop_I32StoF32:
3258       case Iop_F32toI32S:
3259          /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3260          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3261 
3262       case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32  */
3263       case Iop_F128toF32:  /* IRRoundingMode(I32) x F128 -> F32         */
3264          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3265 
3266       case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64  */
3267       case Iop_F128toF64:  /* IRRoundingMode(I32) x F128 -> F64         */
3268       case Iop_D128toD64:  /* IRRoundingModeDFP(I64) x D128 -> D64 */
3269       case Iop_D128toI64S: /* IRRoundingModeDFP(I64) x D128 -> signed I64  */
3270          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3271 
3272       case Iop_F64HLtoF128:
3273       case Iop_D64HLtoD128:
3274          return assignNew('V', mce, Ity_I128,
3275                           binop(Iop_64HLto128, vatom1, vatom2));
3276 
3277       case Iop_F64toI32U:
3278       case Iop_F64toI32S:
3279       case Iop_F64toF32:
3280       case Iop_I64UtoF32:
3281          /* First arg is I32 (rounding mode), second is F64 (data). */
3282          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3283 
3284       case Iop_D64toD32:
3285          /* First arg is I64 (DFProunding mode), second is D64 (data). */
3286          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3287 
3288       case Iop_F64toI16S:
3289          /* First arg is I32 (rounding mode), second is F64 (data). */
3290          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3291 
3292       case Iop_InsertExpD64:
3293          /*  I64 x I64 -> D64 */
3294          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3295 
3296       case Iop_InsertExpD128:
3297          /*  I64 x I128 -> D128 */
3298          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3299 
3300       case Iop_CmpF32:
3301       case Iop_CmpF64:
3302       case Iop_CmpF128:
3303       case Iop_CmpD64:
3304       case Iop_CmpD128:
3305          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3306 
3307       /* non-FP after here */
3308 
3309       case Iop_DivModU64to32:
3310       case Iop_DivModS64to32:
3311          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3312 
3313       case Iop_DivModU128to64:
3314       case Iop_DivModS128to64:
3315          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3316 
3317       case Iop_16HLto32:
3318          return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
3319       case Iop_32HLto64:
3320          return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3321 
3322       case Iop_DivModS64to64:
3323       case Iop_MullS64:
3324       case Iop_MullU64: {
3325          IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3326          IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
3327          return assignNew('V', mce, Ity_I128,
3328                           binop(Iop_64HLto128, vHi64, vLo64));
3329       }
3330 
3331       case Iop_MullS32:
3332       case Iop_MullU32: {
3333          IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3334          IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
3335          return assignNew('V', mce, Ity_I64,
3336                           binop(Iop_32HLto64, vHi32, vLo32));
3337       }
3338 
3339       case Iop_MullS16:
3340       case Iop_MullU16: {
3341          IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3342          IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
3343          return assignNew('V', mce, Ity_I32,
3344                           binop(Iop_16HLto32, vHi16, vLo16));
3345       }
3346 
3347       case Iop_MullS8:
3348       case Iop_MullU8: {
3349          IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3350          IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
3351          return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
3352       }
3353 
3354       case Iop_Sad8Ux4: /* maybe we could do better?  ftm, do mkLazy2. */
3355       case Iop_DivS32:
3356       case Iop_DivU32:
3357       case Iop_DivU32E:
3358       case Iop_DivS32E:
3359       case Iop_QAdd32S: /* could probably do better */
3360       case Iop_QSub32S: /* could probably do better */
3361          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3362 
3363       case Iop_DivS64:
3364       case Iop_DivU64:
3365       case Iop_DivS64E:
3366       case Iop_DivU64E:
3367          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3368 
3369       case Iop_Add32:
3370          if (mce->bogusLiterals || mce->useLLVMworkarounds)
3371             return expensiveAddSub(mce,True,Ity_I32,
3372                                    vatom1,vatom2, atom1,atom2);
3373          else
3374             goto cheap_AddSub32;
3375       case Iop_Sub32:
3376          if (mce->bogusLiterals)
3377             return expensiveAddSub(mce,False,Ity_I32,
3378                                    vatom1,vatom2, atom1,atom2);
3379          else
3380             goto cheap_AddSub32;
3381 
3382       cheap_AddSub32:
3383       case Iop_Mul32:
3384          return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3385 
3386       case Iop_CmpORD32S:
3387       case Iop_CmpORD32U:
3388       case Iop_CmpORD64S:
3389       case Iop_CmpORD64U:
3390          return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
3391 
3392       case Iop_Add64:
3393          if (mce->bogusLiterals || mce->useLLVMworkarounds)
3394             return expensiveAddSub(mce,True,Ity_I64,
3395                                    vatom1,vatom2, atom1,atom2);
3396          else
3397             goto cheap_AddSub64;
3398       case Iop_Sub64:
3399          if (mce->bogusLiterals)
3400             return expensiveAddSub(mce,False,Ity_I64,
3401                                    vatom1,vatom2, atom1,atom2);
3402          else
3403             goto cheap_AddSub64;
3404 
3405       cheap_AddSub64:
3406       case Iop_Mul64:
3407          return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3408 
3409       case Iop_Mul16:
3410       case Iop_Add16:
3411       case Iop_Sub16:
3412          return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3413 
3414       case Iop_Sub8:
3415       case Iop_Add8:
3416          return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3417 
3418       case Iop_CmpEQ64:
3419       case Iop_CmpNE64:
3420          if (mce->bogusLiterals)
3421             return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3422          else
3423             goto cheap_cmp64;
3424       cheap_cmp64:
3425       case Iop_CmpLE64S: case Iop_CmpLE64U:
3426       case Iop_CmpLT64U: case Iop_CmpLT64S:
3427          return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3428 
3429       case Iop_CmpEQ32:
3430       case Iop_CmpNE32:
3431          if (mce->bogusLiterals)
3432             return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3433          else
3434             goto cheap_cmp32;
3435       cheap_cmp32:
3436       case Iop_CmpLE32S: case Iop_CmpLE32U:
3437       case Iop_CmpLT32U: case Iop_CmpLT32S:
3438          return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3439 
3440       case Iop_CmpEQ16: case Iop_CmpNE16:
3441          return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3442 
3443       case Iop_CmpEQ8: case Iop_CmpNE8:
3444          return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3445 
3446       case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
3447       case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3448       case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3449       case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3450          /* Just say these all produce a defined result, regardless
3451             of their arguments.  See COMMENT_ON_CasCmpEQ in this file. */
3452          return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3453 
3454       case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3455          return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3456 
3457       case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
3458          return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
3459 
3460       case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
3461          return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
3462 
3463       case Iop_Shl8: case Iop_Shr8:
3464          return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
3465 
3466       case Iop_AndV256:
3467          uifu = mkUifUV256; difd = mkDifDV256;
3468          and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
3469       case Iop_AndV128:
3470          uifu = mkUifUV128; difd = mkDifDV128;
3471          and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
3472       case Iop_And64:
3473          uifu = mkUifU64; difd = mkDifD64;
3474          and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
3475       case Iop_And32:
3476          uifu = mkUifU32; difd = mkDifD32;
3477          and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3478       case Iop_And16:
3479          uifu = mkUifU16; difd = mkDifD16;
3480          and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3481       case Iop_And8:
3482          uifu = mkUifU8; difd = mkDifD8;
3483          and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3484 
3485       case Iop_OrV256:
3486          uifu = mkUifUV256; difd = mkDifDV256;
3487          and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
3488       case Iop_OrV128:
3489          uifu = mkUifUV128; difd = mkDifDV128;
3490          and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
3491       case Iop_Or64:
3492          uifu = mkUifU64; difd = mkDifD64;
3493          and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
3494       case Iop_Or32:
3495          uifu = mkUifU32; difd = mkDifD32;
3496          and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3497       case Iop_Or16:
3498          uifu = mkUifU16; difd = mkDifD16;
3499          and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3500       case Iop_Or8:
3501          uifu = mkUifU8; difd = mkDifD8;
3502          and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3503 
3504       do_And_Or:
3505          return
3506          assignNew(
3507             'V', mce,
3508             and_or_ty,
3509             difd(mce, uifu(mce, vatom1, vatom2),
3510                       difd(mce, improve(mce, atom1, vatom1),
3511                                 improve(mce, atom2, vatom2) ) ) );
3512 
3513       case Iop_Xor8:
3514          return mkUifU8(mce, vatom1, vatom2);
3515       case Iop_Xor16:
3516          return mkUifU16(mce, vatom1, vatom2);
3517       case Iop_Xor32:
3518          return mkUifU32(mce, vatom1, vatom2);
3519       case Iop_Xor64:
3520          return mkUifU64(mce, vatom1, vatom2);
3521       case Iop_XorV128:
3522          return mkUifUV128(mce, vatom1, vatom2);
3523       case Iop_XorV256:
3524          return mkUifUV256(mce, vatom1, vatom2);
3525 
3526       default:
3527          ppIROp(op);
3528          VG_(tool_panic)("memcheck:expr2vbits_Binop");
3529    }
3530 }
3531 
3532 
3533 static
expr2vbits_Unop(MCEnv * mce,IROp op,IRAtom * atom)3534 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3535 {
3536    IRAtom* vatom = expr2vbits( mce, atom );
3537    tl_assert(isOriginalAtom(mce,atom));
3538    switch (op) {
3539 
3540       case Iop_Sqrt64Fx2:
3541          return unary64Fx2(mce, vatom);
3542 
3543       case Iop_Sqrt64F0x2:
3544          return unary64F0x2(mce, vatom);
3545 
3546       case Iop_Sqrt32Fx8:
3547       case Iop_RSqrt32Fx8:
3548       case Iop_Recip32Fx8:
3549          return unary32Fx8(mce, vatom);
3550 
3551       case Iop_Sqrt64Fx4:
3552          return unary64Fx4(mce, vatom);
3553 
3554       case Iop_Sqrt32Fx4:
3555       case Iop_RSqrt32Fx4:
3556       case Iop_Recip32Fx4:
3557       case Iop_I32UtoFx4:
3558       case Iop_I32StoFx4:
3559       case Iop_QFtoI32Ux4_RZ:
3560       case Iop_QFtoI32Sx4_RZ:
3561       case Iop_RoundF32x4_RM:
3562       case Iop_RoundF32x4_RP:
3563       case Iop_RoundF32x4_RN:
3564       case Iop_RoundF32x4_RZ:
3565       case Iop_Recip32x4:
3566       case Iop_Abs32Fx4:
3567       case Iop_Neg32Fx4:
3568       case Iop_Rsqrte32Fx4:
3569          return unary32Fx4(mce, vatom);
3570 
3571       case Iop_I32UtoFx2:
3572       case Iop_I32StoFx2:
3573       case Iop_Recip32Fx2:
3574       case Iop_Recip32x2:
3575       case Iop_Abs32Fx2:
3576       case Iop_Neg32Fx2:
3577       case Iop_Rsqrte32Fx2:
3578          return unary32Fx2(mce, vatom);
3579 
3580       case Iop_Sqrt32F0x4:
3581       case Iop_RSqrt32F0x4:
3582       case Iop_Recip32F0x4:
3583          return unary32F0x4(mce, vatom);
3584 
3585       case Iop_32UtoV128:
3586       case Iop_64UtoV128:
3587       case Iop_Dup8x16:
3588       case Iop_Dup16x8:
3589       case Iop_Dup32x4:
3590       case Iop_Reverse16_8x16:
3591       case Iop_Reverse32_8x16:
3592       case Iop_Reverse32_16x8:
3593       case Iop_Reverse64_8x16:
3594       case Iop_Reverse64_16x8:
3595       case Iop_Reverse64_32x4:
3596       case Iop_V256toV128_1: case Iop_V256toV128_0:
3597          return assignNew('V', mce, Ity_V128, unop(op, vatom));
3598 
3599       case Iop_F128HItoF64:  /* F128 -> high half of F128 */
3600       case Iop_D128HItoD64:  /* D128 -> high half of D128 */
3601          return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3602       case Iop_F128LOtoF64:  /* F128 -> low  half of F128 */
3603       case Iop_D128LOtoD64:  /* D128 -> low  half of D128 */
3604          return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3605 
3606       case Iop_NegF128:
3607       case Iop_AbsF128:
3608          return mkPCastTo(mce, Ity_I128, vatom);
3609 
3610       case Iop_I32StoF128: /* signed I32 -> F128 */
3611       case Iop_I64StoF128: /* signed I64 -> F128 */
3612       case Iop_F32toF128:  /* F32 -> F128 */
3613       case Iop_F64toF128:  /* F64 -> F128 */
3614       case Iop_I64StoD128: /* signed I64 -> D128 */
3615          return mkPCastTo(mce, Ity_I128, vatom);
3616 
3617       case Iop_F32toF64:
3618       case Iop_I32StoF64:
3619       case Iop_I32UtoF64:
3620       case Iop_NegF64:
3621       case Iop_AbsF64:
3622       case Iop_Est5FRSqrt:
3623       case Iop_RoundF64toF64_NEAREST:
3624       case Iop_RoundF64toF64_NegINF:
3625       case Iop_RoundF64toF64_PosINF:
3626       case Iop_RoundF64toF64_ZERO:
3627       case Iop_Clz64:
3628       case Iop_Ctz64:
3629       case Iop_D32toD64:
3630       case Iop_ExtractExpD64:    /* D64  -> I64 */
3631       case Iop_ExtractExpD128:   /* D128 -> I64 */
3632       case Iop_DPBtoBCD:
3633       case Iop_BCDtoDPB:
3634          return mkPCastTo(mce, Ity_I64, vatom);
3635 
3636       case Iop_D64toD128:
3637          return mkPCastTo(mce, Ity_I128, vatom);
3638 
3639       case Iop_Clz32:
3640       case Iop_Ctz32:
3641       case Iop_TruncF64asF32:
3642       case Iop_NegF32:
3643       case Iop_AbsF32:
3644          return mkPCastTo(mce, Ity_I32, vatom);
3645 
3646       case Iop_1Uto64:
3647       case Iop_1Sto64:
3648       case Iop_8Uto64:
3649       case Iop_8Sto64:
3650       case Iop_16Uto64:
3651       case Iop_16Sto64:
3652       case Iop_32Sto64:
3653       case Iop_32Uto64:
3654       case Iop_V128to64:
3655       case Iop_V128HIto64:
3656       case Iop_128HIto64:
3657       case Iop_128to64:
3658       case Iop_Dup8x8:
3659       case Iop_Dup16x4:
3660       case Iop_Dup32x2:
3661       case Iop_Reverse16_8x8:
3662       case Iop_Reverse32_8x8:
3663       case Iop_Reverse32_16x4:
3664       case Iop_Reverse64_8x8:
3665       case Iop_Reverse64_16x4:
3666       case Iop_Reverse64_32x2:
3667       case Iop_V256to64_0: case Iop_V256to64_1:
3668       case Iop_V256to64_2: case Iop_V256to64_3:
3669          return assignNew('V', mce, Ity_I64, unop(op, vatom));
3670 
3671       case Iop_I16StoF32:
3672       case Iop_64to32:
3673       case Iop_64HIto32:
3674       case Iop_1Uto32:
3675       case Iop_1Sto32:
3676       case Iop_8Uto32:
3677       case Iop_16Uto32:
3678       case Iop_16Sto32:
3679       case Iop_8Sto32:
3680       case Iop_V128to32:
3681          return assignNew('V', mce, Ity_I32, unop(op, vatom));
3682 
3683       case Iop_8Sto16:
3684       case Iop_8Uto16:
3685       case Iop_32to16:
3686       case Iop_32HIto16:
3687       case Iop_64to16:
3688          return assignNew('V', mce, Ity_I16, unop(op, vatom));
3689 
3690       case Iop_1Uto8:
3691       case Iop_1Sto8:
3692       case Iop_16to8:
3693       case Iop_16HIto8:
3694       case Iop_32to8:
3695       case Iop_64to8:
3696          return assignNew('V', mce, Ity_I8, unop(op, vatom));
3697 
3698       case Iop_32to1:
3699          return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
3700 
3701       case Iop_64to1:
3702          return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
3703 
3704       case Iop_ReinterpF64asI64:
3705       case Iop_ReinterpI64asF64:
3706       case Iop_ReinterpI32asF32:
3707       case Iop_ReinterpF32asI32:
3708       case Iop_ReinterpI64asD64:
3709       case Iop_ReinterpD64asI64:
3710       case Iop_NotV256:
3711       case Iop_NotV128:
3712       case Iop_Not64:
3713       case Iop_Not32:
3714       case Iop_Not16:
3715       case Iop_Not8:
3716       case Iop_Not1:
3717          return vatom;
3718 
3719       case Iop_CmpNEZ8x8:
3720       case Iop_Cnt8x8:
3721       case Iop_Clz8Sx8:
3722       case Iop_Cls8Sx8:
3723       case Iop_Abs8x8:
3724          return mkPCast8x8(mce, vatom);
3725 
3726       case Iop_CmpNEZ8x16:
3727       case Iop_Cnt8x16:
3728       case Iop_Clz8Sx16:
3729       case Iop_Cls8Sx16:
3730       case Iop_Abs8x16:
3731          return mkPCast8x16(mce, vatom);
3732 
3733       case Iop_CmpNEZ16x4:
3734       case Iop_Clz16Sx4:
3735       case Iop_Cls16Sx4:
3736       case Iop_Abs16x4:
3737          return mkPCast16x4(mce, vatom);
3738 
3739       case Iop_CmpNEZ16x8:
3740       case Iop_Clz16Sx8:
3741       case Iop_Cls16Sx8:
3742       case Iop_Abs16x8:
3743          return mkPCast16x8(mce, vatom);
3744 
3745       case Iop_CmpNEZ32x2:
3746       case Iop_Clz32Sx2:
3747       case Iop_Cls32Sx2:
3748       case Iop_FtoI32Ux2_RZ:
3749       case Iop_FtoI32Sx2_RZ:
3750       case Iop_Abs32x2:
3751          return mkPCast32x2(mce, vatom);
3752 
3753       case Iop_CmpNEZ32x4:
3754       case Iop_Clz32Sx4:
3755       case Iop_Cls32Sx4:
3756       case Iop_FtoI32Ux4_RZ:
3757       case Iop_FtoI32Sx4_RZ:
3758       case Iop_Abs32x4:
3759          return mkPCast32x4(mce, vatom);
3760 
3761       case Iop_CmpwNEZ64:
3762          return mkPCastTo(mce, Ity_I64, vatom);
3763 
3764       case Iop_CmpNEZ64x2:
3765          return mkPCast64x2(mce, vatom);
3766 
3767       case Iop_NarrowUn16to8x8:
3768       case Iop_NarrowUn32to16x4:
3769       case Iop_NarrowUn64to32x2:
3770       case Iop_QNarrowUn16Sto8Sx8:
3771       case Iop_QNarrowUn16Sto8Ux8:
3772       case Iop_QNarrowUn16Uto8Ux8:
3773       case Iop_QNarrowUn32Sto16Sx4:
3774       case Iop_QNarrowUn32Sto16Ux4:
3775       case Iop_QNarrowUn32Uto16Ux4:
3776       case Iop_QNarrowUn64Sto32Sx2:
3777       case Iop_QNarrowUn64Sto32Ux2:
3778       case Iop_QNarrowUn64Uto32Ux2:
3779          return vectorNarrowUnV128(mce, op, vatom);
3780 
3781       case Iop_Widen8Sto16x8:
3782       case Iop_Widen8Uto16x8:
3783       case Iop_Widen16Sto32x4:
3784       case Iop_Widen16Uto32x4:
3785       case Iop_Widen32Sto64x2:
3786       case Iop_Widen32Uto64x2:
3787          return vectorWidenI64(mce, op, vatom);
3788 
3789       case Iop_PwAddL32Ux2:
3790       case Iop_PwAddL32Sx2:
3791          return mkPCastTo(mce, Ity_I64,
3792                assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3793 
3794       case Iop_PwAddL16Ux4:
3795       case Iop_PwAddL16Sx4:
3796          return mkPCast32x2(mce,
3797                assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3798 
3799       case Iop_PwAddL8Ux8:
3800       case Iop_PwAddL8Sx8:
3801          return mkPCast16x4(mce,
3802                assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3803 
3804       case Iop_PwAddL32Ux4:
3805       case Iop_PwAddL32Sx4:
3806          return mkPCast64x2(mce,
3807                assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3808 
3809       case Iop_PwAddL16Ux8:
3810       case Iop_PwAddL16Sx8:
3811          return mkPCast32x4(mce,
3812                assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3813 
3814       case Iop_PwAddL8Ux16:
3815       case Iop_PwAddL8Sx16:
3816          return mkPCast16x8(mce,
3817                assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3818 
3819       case Iop_I64UtoF32:
3820       default:
3821          ppIROp(op);
3822          VG_(tool_panic)("memcheck:expr2vbits_Unop");
3823    }
3824 }
3825 
3826 
3827 /* Worker function; do not call directly. */
3828 static
expr2vbits_Load_WRK(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias)3829 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3830                               IREndness end, IRType ty,
3831                               IRAtom* addr, UInt bias )
3832 {
3833    void*    helper;
3834    Char*    hname;
3835    IRDirty* di;
3836    IRTemp   datavbits;
3837    IRAtom*  addrAct;
3838 
3839    tl_assert(isOriginalAtom(mce,addr));
3840    tl_assert(end == Iend_LE || end == Iend_BE);
3841 
3842    /* First, emit a definedness test for the address.  This also sets
3843       the address (shadow) to 'defined' following the test. */
3844    complainIfUndefined( mce, addr, NULL );
3845 
3846    /* Now cook up a call to the relevant helper function, to read the
3847       data V bits from shadow memory. */
3848    ty = shadowTypeV(ty);
3849 
3850    if (end == Iend_LE) {
3851       switch (ty) {
3852          case Ity_I64: helper = &MC_(helperc_LOADV64le);
3853                        hname = "MC_(helperc_LOADV64le)";
3854                        break;
3855          case Ity_I32: helper = &MC_(helperc_LOADV32le);
3856                        hname = "MC_(helperc_LOADV32le)";
3857                        break;
3858          case Ity_I16: helper = &MC_(helperc_LOADV16le);
3859                        hname = "MC_(helperc_LOADV16le)";
3860                        break;
3861          case Ity_I8:  helper = &MC_(helperc_LOADV8);
3862                        hname = "MC_(helperc_LOADV8)";
3863                        break;
3864          default:      ppIRType(ty);
3865                        VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3866       }
3867    } else {
3868       switch (ty) {
3869          case Ity_I64: helper = &MC_(helperc_LOADV64be);
3870                        hname = "MC_(helperc_LOADV64be)";
3871                        break;
3872          case Ity_I32: helper = &MC_(helperc_LOADV32be);
3873                        hname = "MC_(helperc_LOADV32be)";
3874                        break;
3875          case Ity_I16: helper = &MC_(helperc_LOADV16be);
3876                        hname = "MC_(helperc_LOADV16be)";
3877                        break;
3878          case Ity_I8:  helper = &MC_(helperc_LOADV8);
3879                        hname = "MC_(helperc_LOADV8)";
3880                        break;
3881          default:      ppIRType(ty);
3882                        VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3883       }
3884    }
3885 
3886    /* Generate the actual address into addrAct. */
3887    if (bias == 0) {
3888       addrAct = addr;
3889    } else {
3890       IROp    mkAdd;
3891       IRAtom* eBias;
3892       IRType  tyAddr  = mce->hWordTy;
3893       tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
3894       mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3895       eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
3896       addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
3897    }
3898 
3899    /* We need to have a place to park the V bits we're just about to
3900       read. */
3901    datavbits = newTemp(mce, ty, VSh);
3902    di = unsafeIRDirty_1_N( datavbits,
3903                            1/*regparms*/,
3904                            hname, VG_(fnptr_to_fnentry)( helper ),
3905                            mkIRExprVec_1( addrAct ));
3906    setHelperAnns( mce, di );
3907    stmt( 'V', mce, IRStmt_Dirty(di) );
3908 
3909    return mkexpr(datavbits);
3910 }
3911 
3912 
3913 static
expr2vbits_Load(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias)3914 IRAtom* expr2vbits_Load ( MCEnv* mce,
3915                           IREndness end, IRType ty,
3916                           IRAtom* addr, UInt bias )
3917 {
3918    tl_assert(end == Iend_LE || end == Iend_BE);
3919    switch (shadowTypeV(ty)) {
3920       case Ity_I8:
3921       case Ity_I16:
3922       case Ity_I32:
3923       case Ity_I64:
3924          return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
3925       case Ity_V128: {
3926          IRAtom *v64hi, *v64lo;
3927          if (end == Iend_LE) {
3928             v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
3929             v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3930          } else {
3931             v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
3932             v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3933          }
3934          return assignNew( 'V', mce,
3935                            Ity_V128,
3936                            binop(Iop_64HLtoV128, v64hi, v64lo));
3937       }
3938       case Ity_V256: {
3939          /* V256-bit case -- phrased in terms of 64 bit units (Qs),
3940             with Q3 being the most significant lane. */
3941          if (end == Iend_BE) goto unhandled;
3942          IRAtom* v64Q0 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
3943          IRAtom* v64Q1 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3944          IRAtom* v64Q2 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16);
3945          IRAtom* v64Q3 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24);
3946          return assignNew( 'V', mce,
3947                            Ity_V256,
3948                            IRExpr_Qop(Iop_64x4toV256,
3949                                       v64Q3, v64Q2, v64Q1, v64Q0));
3950       }
3951       unhandled:
3952       default:
3953          VG_(tool_panic)("expr2vbits_Load");
3954    }
3955 }
3956 
3957 
3958 /* If there is no guard expression or the guard is always TRUE this function
3959    behaves like expr2vbits_Load. If the guard is not true at runtime, an
3960    all-bits-defined bit pattern will be returned.
3961    It is assumed that definedness of GUARD has already been checked at the call
3962    site. */
3963 static
expr2vbits_guarded_Load(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard)3964 IRAtom* expr2vbits_guarded_Load ( MCEnv* mce,
3965                                   IREndness end, IRType ty,
3966                                   IRAtom* addr, UInt bias, IRAtom *guard )
3967 {
3968    if (guard) {
3969       IRAtom *cond, *iffalse, *iftrue;
3970 
3971       cond    = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
3972       iftrue  = assignNew('V', mce, ty,
3973                           expr2vbits_Load(mce, end, ty, addr, bias));
3974       iffalse = assignNew('V', mce, ty, definedOfType(ty));
3975 
3976       return assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, iftrue));
3977    }
3978 
3979    /* No guard expression or unconditional load */
3980    return expr2vbits_Load(mce, end, ty, addr, bias);
3981 }
3982 
3983 
3984 static
expr2vbits_Mux0X(MCEnv * mce,IRAtom * cond,IRAtom * expr0,IRAtom * exprX)3985 IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
3986                            IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
3987 {
3988    IRAtom *vbitsC, *vbits0, *vbitsX;
3989    IRType ty;
3990    /* Given Mux0X(cond,expr0,exprX), generate
3991          Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
3992       That is, steer the V bits like the originals, but trash the
3993       result if the steering value is undefined.  This gives
3994       lazy propagation. */
3995    tl_assert(isOriginalAtom(mce, cond));
3996    tl_assert(isOriginalAtom(mce, expr0));
3997    tl_assert(isOriginalAtom(mce, exprX));
3998 
3999    vbitsC = expr2vbits(mce, cond);
4000    vbits0 = expr2vbits(mce, expr0);
4001    vbitsX = expr2vbits(mce, exprX);
4002    ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
4003 
4004    return
4005       mkUifU(mce, ty, assignNew('V', mce, ty,
4006                                      IRExpr_Mux0X(cond, vbits0, vbitsX)),
4007                       mkPCastTo(mce, ty, vbitsC) );
4008 }
4009 
4010 /* --------- This is the main expression-handling function. --------- */
4011 
4012 static
expr2vbits(MCEnv * mce,IRExpr * e)4013 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4014 {
4015    switch (e->tag) {
4016 
4017       case Iex_Get:
4018          return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4019 
4020       case Iex_GetI:
4021          return shadow_GETI( mce, e->Iex.GetI.descr,
4022                                   e->Iex.GetI.ix, e->Iex.GetI.bias );
4023 
4024       case Iex_RdTmp:
4025          return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
4026 
4027       case Iex_Const:
4028          return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
4029 
4030       case Iex_Qop:
4031          return expr2vbits_Qop(
4032                    mce,
4033                    e->Iex.Qop.details->op,
4034                    e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4035                    e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
4036                 );
4037 
4038       case Iex_Triop:
4039          return expr2vbits_Triop(
4040                    mce,
4041                    e->Iex.Triop.details->op,
4042                    e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4043                    e->Iex.Triop.details->arg3
4044                 );
4045 
4046       case Iex_Binop:
4047          return expr2vbits_Binop(
4048                    mce,
4049                    e->Iex.Binop.op,
4050                    e->Iex.Binop.arg1, e->Iex.Binop.arg2
4051                 );
4052 
4053       case Iex_Unop:
4054          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4055 
4056       case Iex_Load:
4057          return expr2vbits_Load( mce, e->Iex.Load.end,
4058                                       e->Iex.Load.ty,
4059                                       e->Iex.Load.addr, 0/*addr bias*/ );
4060 
4061       case Iex_CCall:
4062          return mkLazyN( mce, e->Iex.CCall.args,
4063                               e->Iex.CCall.retty,
4064                               e->Iex.CCall.cee );
4065 
4066       case Iex_Mux0X:
4067          return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
4068                                        e->Iex.Mux0X.exprX);
4069 
4070       default:
4071          VG_(printf)("\n");
4072          ppIRExpr(e);
4073          VG_(printf)("\n");
4074          VG_(tool_panic)("memcheck: expr2vbits");
4075    }
4076 }
4077 
4078 /*------------------------------------------------------------*/
4079 /*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
4080 /*------------------------------------------------------------*/
4081 
4082 /* Widen a value to the host word size. */
4083 
4084 static
zwidenToHostWord(MCEnv * mce,IRAtom * vatom)4085 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
4086 {
4087    IRType ty, tyH;
4088 
4089    /* vatom is vbits-value and as such can only have a shadow type. */
4090    tl_assert(isShadowAtom(mce,vatom));
4091 
4092    ty  = typeOfIRExpr(mce->sb->tyenv, vatom);
4093    tyH = mce->hWordTy;
4094 
4095    if (tyH == Ity_I32) {
4096       switch (ty) {
4097          case Ity_I32:
4098             return vatom;
4099          case Ity_I16:
4100             return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4101          case Ity_I8:
4102             return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4103          default:
4104             goto unhandled;
4105       }
4106    } else
4107    if (tyH == Ity_I64) {
4108       switch (ty) {
4109          case Ity_I32:
4110             return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4111          case Ity_I16:
4112             return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4113                    assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4114          case Ity_I8:
4115             return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4116                    assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4117          default:
4118             goto unhandled;
4119       }
4120    } else {
4121       goto unhandled;
4122    }
4123   unhandled:
4124    VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
4125    VG_(tool_panic)("zwidenToHostWord");
4126 }
4127 
4128 
4129 /* Generate a shadow store.  addr is always the original address atom.
4130    You can pass in either originals or V-bits for the data atom, but
4131    obviously not both.  guard :: Ity_I1 controls whether the store
4132    really happens; NULL means it unconditionally does.  Note that
4133    guard itself is not checked for definedness; the caller of this
4134    function must do that if necessary. */
4135 
4136 static
do_shadow_Store(MCEnv * mce,IREndness end,IRAtom * addr,UInt bias,IRAtom * data,IRAtom * vdata,IRAtom * guard)4137 void do_shadow_Store ( MCEnv* mce,
4138                        IREndness end,
4139                        IRAtom* addr, UInt bias,
4140                        IRAtom* data, IRAtom* vdata,
4141                        IRAtom* guard )
4142 {
4143    IROp     mkAdd;
4144    IRType   ty, tyAddr;
4145    void*    helper = NULL;
4146    Char*    hname = NULL;
4147    IRConst* c;
4148 
4149    tyAddr = mce->hWordTy;
4150    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4151    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
4152    tl_assert( end == Iend_LE || end == Iend_BE );
4153 
4154    if (data) {
4155       tl_assert(!vdata);
4156       tl_assert(isOriginalAtom(mce, data));
4157       tl_assert(bias == 0);
4158       vdata = expr2vbits( mce, data );
4159    } else {
4160       tl_assert(vdata);
4161    }
4162 
4163    tl_assert(isOriginalAtom(mce,addr));
4164    tl_assert(isShadowAtom(mce,vdata));
4165 
4166    if (guard) {
4167       tl_assert(isOriginalAtom(mce, guard));
4168       tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4169    }
4170 
4171    ty = typeOfIRExpr(mce->sb->tyenv, vdata);
4172 
4173    // If we're not doing undefined value checking, pretend that this value
4174    // is "all valid".  That lets Vex's optimiser remove some of the V bit
4175    // shadow computation ops that precede it.
4176    if (MC_(clo_mc_level) == 1) {
4177       switch (ty) {
4178          case Ity_V256: // V256 weirdness -- used four times
4179                         c = IRConst_V256(V_BITS32_DEFINED); break;
4180          case Ity_V128: // V128 weirdness -- used twice
4181                         c = IRConst_V128(V_BITS16_DEFINED); break;
4182          case Ity_I64:  c = IRConst_U64 (V_BITS64_DEFINED); break;
4183          case Ity_I32:  c = IRConst_U32 (V_BITS32_DEFINED); break;
4184          case Ity_I16:  c = IRConst_U16 (V_BITS16_DEFINED); break;
4185          case Ity_I8:   c = IRConst_U8  (V_BITS8_DEFINED);  break;
4186          default:       VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4187       }
4188       vdata = IRExpr_Const( c );
4189    }
4190 
4191    /* First, emit a definedness test for the address.  This also sets
4192       the address (shadow) to 'defined' following the test. */
4193    complainIfUndefined( mce, addr, guard );
4194 
4195    /* Now decide which helper function to call to write the data V
4196       bits into shadow memory. */
4197    if (end == Iend_LE) {
4198       switch (ty) {
4199          case Ity_V256: /* we'll use the helper four times */
4200          case Ity_V128: /* we'll use the helper twice */
4201          case Ity_I64: helper = &MC_(helperc_STOREV64le);
4202                        hname = "MC_(helperc_STOREV64le)";
4203                        break;
4204          case Ity_I32: helper = &MC_(helperc_STOREV32le);
4205                        hname = "MC_(helperc_STOREV32le)";
4206                        break;
4207          case Ity_I16: helper = &MC_(helperc_STOREV16le);
4208                        hname = "MC_(helperc_STOREV16le)";
4209                        break;
4210          case Ity_I8:  helper = &MC_(helperc_STOREV8);
4211                        hname = "MC_(helperc_STOREV8)";
4212                        break;
4213          default:      VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4214       }
4215    } else {
4216       switch (ty) {
4217          case Ity_V128: /* we'll use the helper twice */
4218          case Ity_I64: helper = &MC_(helperc_STOREV64be);
4219                        hname = "MC_(helperc_STOREV64be)";
4220                        break;
4221          case Ity_I32: helper = &MC_(helperc_STOREV32be);
4222                        hname = "MC_(helperc_STOREV32be)";
4223                        break;
4224          case Ity_I16: helper = &MC_(helperc_STOREV16be);
4225                        hname = "MC_(helperc_STOREV16be)";
4226                        break;
4227          case Ity_I8:  helper = &MC_(helperc_STOREV8);
4228                        hname = "MC_(helperc_STOREV8)";
4229                        break;
4230          /* Note, no V256 case here, because no big-endian target that
4231             we support, has 256 vectors. */
4232          default:      VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
4233       }
4234    }
4235 
4236    if (UNLIKELY(ty == Ity_V256)) {
4237 
4238       /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
4239          Q3 being the most significant lane. */
4240       /* These are the offsets of the Qs in memory. */
4241       Int     offQ0, offQ1, offQ2, offQ3;
4242 
4243       /* Various bits for constructing the 4 lane helper calls */
4244       IRDirty *diQ0,    *diQ1,    *diQ2,    *diQ3;
4245       IRAtom  *addrQ0,  *addrQ1,  *addrQ2,  *addrQ3;
4246       IRAtom  *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
4247       IRAtom  *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
4248 
4249       if (end == Iend_LE) {
4250          offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
4251       } else {
4252          offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
4253       }
4254 
4255       eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
4256       addrQ0  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
4257       vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
4258       diQ0    = unsafeIRDirty_0_N(
4259                    1/*regparms*/,
4260                    hname, VG_(fnptr_to_fnentry)( helper ),
4261                    mkIRExprVec_2( addrQ0, vdataQ0 )
4262                 );
4263 
4264       eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
4265       addrQ1  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
4266       vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
4267       diQ1    = unsafeIRDirty_0_N(
4268                    1/*regparms*/,
4269                    hname, VG_(fnptr_to_fnentry)( helper ),
4270                    mkIRExprVec_2( addrQ1, vdataQ1 )
4271                 );
4272 
4273       eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
4274       addrQ2  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
4275       vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
4276       diQ2    = unsafeIRDirty_0_N(
4277                    1/*regparms*/,
4278                    hname, VG_(fnptr_to_fnentry)( helper ),
4279                    mkIRExprVec_2( addrQ2, vdataQ2 )
4280                 );
4281 
4282       eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
4283       addrQ3  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
4284       vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
4285       diQ3    = unsafeIRDirty_0_N(
4286                    1/*regparms*/,
4287                    hname, VG_(fnptr_to_fnentry)( helper ),
4288                    mkIRExprVec_2( addrQ3, vdataQ3 )
4289                 );
4290 
4291       if (guard)
4292          diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
4293 
4294       setHelperAnns( mce, diQ0 );
4295       setHelperAnns( mce, diQ1 );
4296       setHelperAnns( mce, diQ2 );
4297       setHelperAnns( mce, diQ3 );
4298       stmt( 'V', mce, IRStmt_Dirty(diQ0) );
4299       stmt( 'V', mce, IRStmt_Dirty(diQ1) );
4300       stmt( 'V', mce, IRStmt_Dirty(diQ2) );
4301       stmt( 'V', mce, IRStmt_Dirty(diQ3) );
4302 
4303    }
4304    else if (UNLIKELY(ty == Ity_V128)) {
4305 
4306       /* V128-bit case */
4307       /* See comment in next clause re 64-bit regparms */
4308       /* also, need to be careful about endianness */
4309 
4310       Int     offLo64, offHi64;
4311       IRDirty *diLo64, *diHi64;
4312       IRAtom  *addrLo64, *addrHi64;
4313       IRAtom  *vdataLo64, *vdataHi64;
4314       IRAtom  *eBiasLo64, *eBiasHi64;
4315 
4316       if (end == Iend_LE) {
4317          offLo64 = 0;
4318          offHi64 = 8;
4319       } else {
4320          offLo64 = 8;
4321          offHi64 = 0;
4322       }
4323 
4324       eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
4325       addrLo64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
4326       vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
4327       diLo64    = unsafeIRDirty_0_N(
4328                      1/*regparms*/,
4329                      hname, VG_(fnptr_to_fnentry)( helper ),
4330                      mkIRExprVec_2( addrLo64, vdataLo64 )
4331                   );
4332       eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
4333       addrHi64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
4334       vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
4335       diHi64    = unsafeIRDirty_0_N(
4336                      1/*regparms*/,
4337                      hname, VG_(fnptr_to_fnentry)( helper ),
4338                      mkIRExprVec_2( addrHi64, vdataHi64 )
4339                   );
4340       if (guard) diLo64->guard = guard;
4341       if (guard) diHi64->guard = guard;
4342       setHelperAnns( mce, diLo64 );
4343       setHelperAnns( mce, diHi64 );
4344       stmt( 'V', mce, IRStmt_Dirty(diLo64) );
4345       stmt( 'V', mce, IRStmt_Dirty(diHi64) );
4346 
4347    } else {
4348 
4349       IRDirty *di;
4350       IRAtom  *addrAct;
4351 
4352       /* 8/16/32/64-bit cases */
4353       /* Generate the actual address into addrAct. */
4354       if (bias == 0) {
4355          addrAct = addr;
4356       } else {
4357          IRAtom* eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
4358          addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
4359       }
4360 
4361       if (ty == Ity_I64) {
4362          /* We can't do this with regparm 2 on 32-bit platforms, since
4363             the back ends aren't clever enough to handle 64-bit
4364             regparm args.  Therefore be different. */
4365          di = unsafeIRDirty_0_N(
4366                  1/*regparms*/,
4367                  hname, VG_(fnptr_to_fnentry)( helper ),
4368                  mkIRExprVec_2( addrAct, vdata )
4369               );
4370       } else {
4371          di = unsafeIRDirty_0_N(
4372                  2/*regparms*/,
4373                  hname, VG_(fnptr_to_fnentry)( helper ),
4374                  mkIRExprVec_2( addrAct,
4375                                 zwidenToHostWord( mce, vdata ))
4376               );
4377       }
4378       if (guard) di->guard = guard;
4379       setHelperAnns( mce, di );
4380       stmt( 'V', mce, IRStmt_Dirty(di) );
4381    }
4382 
4383 }
4384 
4385 
4386 /* Do lazy pessimistic propagation through a dirty helper call, by
4387    looking at the annotations on it.  This is the most complex part of
4388    Memcheck. */
4389 
szToITy(Int n)4390 static IRType szToITy ( Int n )
4391 {
4392    switch (n) {
4393       case 1: return Ity_I8;
4394       case 2: return Ity_I16;
4395       case 4: return Ity_I32;
4396       case 8: return Ity_I64;
4397       default: VG_(tool_panic)("szToITy(memcheck)");
4398    }
4399 }
4400 
4401 static
do_shadow_Dirty(MCEnv * mce,IRDirty * d)4402 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4403 {
4404    Int       i, k, n, toDo, gSz, gOff;
4405    IRAtom    *src, *here, *curr;
4406    IRType    tySrc, tyDst;
4407    IRTemp    dst;
4408    IREndness end;
4409 
4410    /* What's the native endianness?  We need to know this. */
4411 #  if defined(VG_BIGENDIAN)
4412    end = Iend_BE;
4413 #  elif defined(VG_LITTLEENDIAN)
4414    end = Iend_LE;
4415 #  else
4416 #    error "Unknown endianness"
4417 #  endif
4418 
4419    /* First check the guard. */
4420    complainIfUndefined(mce, d->guard, NULL);
4421 
4422    /* Now round up all inputs and PCast over them. */
4423    curr = definedOfType(Ity_I32);
4424 
4425    /* Inputs: unmasked args
4426       Note: arguments are evaluated REGARDLESS of the guard expression */
4427    for (i = 0; d->args[i]; i++) {
4428       if (d->cee->mcx_mask & (1<<i)) {
4429          /* ignore this arg */
4430       } else {
4431          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
4432          curr = mkUifU32(mce, here, curr);
4433       }
4434    }
4435 
4436    /* Inputs: guest state that we read. */
4437    for (i = 0; i < d->nFxState; i++) {
4438       tl_assert(d->fxState[i].fx != Ifx_None);
4439       if (d->fxState[i].fx == Ifx_Write)
4440          continue;
4441 
4442       /* Enumerate the described state segments */
4443       for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4444          gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4445          gSz  = d->fxState[i].size;
4446 
4447          /* Ignore any sections marked as 'always defined'. */
4448          if (isAlwaysDefd(mce, gOff, gSz)) {
4449             if (0)
4450             VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4451                         gOff, gSz);
4452             continue;
4453          }
4454 
4455          /* This state element is read or modified.  So we need to
4456             consider it.  If larger than 8 bytes, deal with it in
4457             8-byte chunks. */
4458          while (True) {
4459             tl_assert(gSz >= 0);
4460             if (gSz == 0) break;
4461             n = gSz <= 8 ? gSz : 8;
4462             /* update 'curr' with UifU of the state slice
4463                gOff .. gOff+n-1 */
4464             tySrc = szToITy( n );
4465 
4466             /* Observe the guard expression. If it is false use an
4467                all-bits-defined bit pattern */
4468             IRAtom *cond, *iffalse, *iftrue;
4469 
4470             cond    = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
4471             iftrue  = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
4472             iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
4473             src     = assignNew('V', mce, tySrc,
4474                                 IRExpr_Mux0X(cond, iffalse, iftrue));
4475 
4476             here = mkPCastTo( mce, Ity_I32, src );
4477             curr = mkUifU32(mce, here, curr);
4478             gSz -= n;
4479             gOff += n;
4480          }
4481       }
4482    }
4483 
4484    /* Inputs: memory.  First set up some info needed regardless of
4485       whether we're doing reads or writes. */
4486 
4487    if (d->mFx != Ifx_None) {
4488       /* Because we may do multiple shadow loads/stores from the same
4489          base address, it's best to do a single test of its
4490          definedness right now.  Post-instrumentation optimisation
4491          should remove all but this test. */
4492       IRType tyAddr;
4493       tl_assert(d->mAddr);
4494       complainIfUndefined(mce, d->mAddr, d->guard);
4495 
4496       tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
4497       tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4498       tl_assert(tyAddr == mce->hWordTy); /* not really right */
4499    }
4500 
4501    /* Deal with memory inputs (reads or modifies) */
4502    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
4503       toDo   = d->mSize;
4504       /* chew off 32-bit chunks.  We don't care about the endianness
4505          since it's all going to be condensed down to a single bit,
4506          but nevertheless choose an endianness which is hopefully
4507          native to the platform. */
4508       while (toDo >= 4) {
4509          here = mkPCastTo(
4510                    mce, Ity_I32,
4511                    expr2vbits_guarded_Load ( mce, end, Ity_I32, d->mAddr,
4512                                              d->mSize - toDo, d->guard )
4513                 );
4514          curr = mkUifU32(mce, here, curr);
4515          toDo -= 4;
4516       }
4517       /* chew off 16-bit chunks */
4518       while (toDo >= 2) {
4519          here = mkPCastTo(
4520                    mce, Ity_I32,
4521                    expr2vbits_guarded_Load ( mce, end, Ity_I16, d->mAddr,
4522                                              d->mSize - toDo, d->guard )
4523                 );
4524          curr = mkUifU32(mce, here, curr);
4525          toDo -= 2;
4526       }
4527       /* chew off the remaining 8-bit chunk, if any */
4528       if (toDo == 1) {
4529          here = mkPCastTo(
4530                    mce, Ity_I32,
4531                    expr2vbits_guarded_Load ( mce, end, Ity_I8, d->mAddr,
4532                                              d->mSize - toDo, d->guard )
4533                 );
4534          curr = mkUifU32(mce, here, curr);
4535          toDo -= 1;
4536       }
4537       tl_assert(toDo == 0);
4538    }
4539 
4540    /* Whew!  So curr is a 32-bit V-value summarising pessimistically
4541       all the inputs to the helper.  Now we need to re-distribute the
4542       results to all destinations. */
4543 
4544    /* Outputs: the destination temporary, if there is one. */
4545    if (d->tmp != IRTemp_INVALID) {
4546       dst   = findShadowTmpV(mce, d->tmp);
4547       tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
4548       assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
4549    }
4550 
4551    /* Outputs: guest state that we write or modify. */
4552    for (i = 0; i < d->nFxState; i++) {
4553       tl_assert(d->fxState[i].fx != Ifx_None);
4554       if (d->fxState[i].fx == Ifx_Read)
4555          continue;
4556 
4557       /* Enumerate the described state segments */
4558       for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4559          gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4560          gSz  = d->fxState[i].size;
4561 
4562          /* Ignore any sections marked as 'always defined'. */
4563          if (isAlwaysDefd(mce, gOff, gSz))
4564             continue;
4565 
4566          /* This state element is written or modified.  So we need to
4567             consider it.  If larger than 8 bytes, deal with it in
4568             8-byte chunks. */
4569          while (True) {
4570             tl_assert(gSz >= 0);
4571             if (gSz == 0) break;
4572             n = gSz <= 8 ? gSz : 8;
4573             /* Write suitably-casted 'curr' to the state slice
4574                gOff .. gOff+n-1 */
4575             tyDst = szToITy( n );
4576             do_shadow_PUT( mce, gOff,
4577                                 NULL, /* original atom */
4578                                 mkPCastTo( mce, tyDst, curr ), d->guard );
4579             gSz -= n;
4580             gOff += n;
4581          }
4582       }
4583    }
4584 
4585    /* Outputs: memory that we write or modify.  Same comments about
4586       endianness as above apply. */
4587    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
4588       toDo   = d->mSize;
4589       /* chew off 32-bit chunks */
4590       while (toDo >= 4) {
4591          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4592                           NULL, /* original data */
4593                           mkPCastTo( mce, Ity_I32, curr ),
4594                           d->guard );
4595          toDo -= 4;
4596       }
4597       /* chew off 16-bit chunks */
4598       while (toDo >= 2) {
4599          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4600                           NULL, /* original data */
4601                           mkPCastTo( mce, Ity_I16, curr ),
4602                           d->guard );
4603          toDo -= 2;
4604       }
4605       /* chew off the remaining 8-bit chunk, if any */
4606       if (toDo == 1) {
4607          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4608                           NULL, /* original data */
4609                           mkPCastTo( mce, Ity_I8, curr ),
4610                           d->guard );
4611          toDo -= 1;
4612       }
4613       tl_assert(toDo == 0);
4614    }
4615 
4616 }
4617 
4618 
4619 /* We have an ABI hint telling us that [base .. base+len-1] is to
4620    become undefined ("writable").  Generate code to call a helper to
4621    notify the A/V bit machinery of this fact.
4622 
4623    We call
4624    void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
4625                                                     Addr nia );
4626 */
4627 static
do_AbiHint(MCEnv * mce,IRExpr * base,Int len,IRExpr * nia)4628 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
4629 {
4630    IRDirty* di;
4631    /* Minor optimisation: if not doing origin tracking, ignore the
4632       supplied nia and pass zero instead.  This is on the basis that
4633       MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
4634       almost always generate a shorter instruction to put zero into a
4635       register than any other value. */
4636    if (MC_(clo_mc_level) < 3)
4637       nia = mkIRExpr_HWord(0);
4638 
4639    di = unsafeIRDirty_0_N(
4640            0/*regparms*/,
4641            "MC_(helperc_MAKE_STACK_UNINIT)",
4642            VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
4643            mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
4644         );
4645    stmt( 'V', mce, IRStmt_Dirty(di) );
4646 }
4647 
4648 
4649 /* ------ Dealing with IRCAS (big and complex) ------ */
4650 
4651 /* FWDS */
4652 static IRAtom* gen_load_b  ( MCEnv* mce, Int szB,
4653                              IRAtom* baseaddr, Int offset );
4654 static IRAtom* gen_maxU32  ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4655 static void    gen_store_b ( MCEnv* mce, Int szB,
4656                              IRAtom* baseaddr, Int offset, IRAtom* dataB,
4657                              IRAtom* guard );
4658 
4659 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4660 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4661 
4662 
4663 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4664    IRExpr.Consts, else this asserts.  If they are both Consts, it
4665    doesn't do anything.  So that just leaves the RdTmp case.
4666 
4667    In which case: this assigns the shadow value SHADOW to the IR
4668    shadow temporary associated with ORIG.  That is, ORIG, being an
4669    original temporary, will have a shadow temporary associated with
4670    it.  However, in the case envisaged here, there will so far have
4671    been no IR emitted to actually write a shadow value into that
4672    temporary.  What this routine does is to (emit IR to) copy the
4673    value in SHADOW into said temporary, so that after this call,
4674    IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4675    value in SHADOW.
4676 
4677    Point is to allow callers to compute "by hand" a shadow value for
4678    ORIG, and force it to be associated with ORIG.
4679 
4680    How do we know that that shadow associated with ORIG has not so far
4681    been assigned to?  Well, we don't per se know that, but supposing
4682    it had.  Then this routine would create a second assignment to it,
4683    and later the IR sanity checker would barf.  But that never
4684    happens.  QED.
4685 */
bind_shadow_tmp_to_orig(UChar how,MCEnv * mce,IRAtom * orig,IRAtom * shadow)4686 static void bind_shadow_tmp_to_orig ( UChar how,
4687                                       MCEnv* mce,
4688                                       IRAtom* orig, IRAtom* shadow )
4689 {
4690    tl_assert(isOriginalAtom(mce, orig));
4691    tl_assert(isShadowAtom(mce, shadow));
4692    switch (orig->tag) {
4693       case Iex_Const:
4694          tl_assert(shadow->tag == Iex_Const);
4695          break;
4696       case Iex_RdTmp:
4697          tl_assert(shadow->tag == Iex_RdTmp);
4698          if (how == 'V') {
4699             assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4700                    shadow);
4701          } else {
4702             tl_assert(how == 'B');
4703             assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4704                    shadow);
4705          }
4706          break;
4707       default:
4708          tl_assert(0);
4709    }
4710 }
4711 
4712 
4713 static
do_shadow_CAS(MCEnv * mce,IRCAS * cas)4714 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4715 {
4716    /* Scheme is (both single- and double- cases):
4717 
4718       1. fetch data#,dataB (the proposed new value)
4719 
4720       2. fetch expd#,expdB (what we expect to see at the address)
4721 
4722       3. check definedness of address
4723 
4724       4. load old#,oldB from shadow memory; this also checks
4725          addressibility of the address
4726 
4727       5. the CAS itself
4728 
4729       6. compute "expected == old".  See COMMENT_ON_CasCmpEQ below.
4730 
4731       7. if "expected == old" (as computed by (6))
4732             store data#,dataB to shadow memory
4733 
4734       Note that 5 reads 'old' but 4 reads 'old#'.  Similarly, 5 stores
4735       'data' but 7 stores 'data#'.  Hence it is possible for the
4736       shadow data to be incorrectly checked and/or updated:
4737 
4738       * 7 is at least gated correctly, since the 'expected == old'
4739         condition is derived from outputs of 5.  However, the shadow
4740         write could happen too late: imagine after 5 we are
4741         descheduled, a different thread runs, writes a different
4742         (shadow) value at the address, and then we resume, hence
4743         overwriting the shadow value written by the other thread.
4744 
4745       Because the original memory access is atomic, there's no way to
4746       make both the original and shadow accesses into a single atomic
4747       thing, hence this is unavoidable.
4748 
4749       At least as Valgrind stands, I don't think it's a problem, since
4750       we're single threaded *and* we guarantee that there are no
4751       context switches during the execution of any specific superblock
4752       -- context switches can only happen at superblock boundaries.
4753 
4754       If Valgrind ever becomes MT in the future, then it might be more
4755       of a problem.  A possible kludge would be to artificially
4756       associate with the location, a lock, which we must acquire and
4757       release around the transaction as a whole.  Hmm, that probably
4758       would't work properly since it only guards us against other
4759       threads doing CASs on the same location, not against other
4760       threads doing normal reads and writes.
4761 
4762       ------------------------------------------------------------
4763 
4764       COMMENT_ON_CasCmpEQ:
4765 
4766       Note two things.  Firstly, in the sequence above, we compute
4767       "expected == old", but we don't check definedness of it.  Why
4768       not?  Also, the x86 and amd64 front ends use
4769       Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4770       determination (expected == old ?) for themselves, and we also
4771       don't check definedness for those primops; we just say that the
4772       result is defined.  Why?  Details follow.
4773 
4774       x86/amd64 contains various forms of locked insns:
4775       * lock prefix before all basic arithmetic insn;
4776         eg lock xorl %reg1,(%reg2)
4777       * atomic exchange reg-mem
4778       * compare-and-swaps
4779 
4780       Rather than attempt to represent them all, which would be a
4781       royal PITA, I used a result from Maurice Herlihy
4782       (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4783       demonstrates that compare-and-swap is a primitive more general
4784       than the other two, and so can be used to represent all of them.
4785       So the translation scheme for (eg) lock incl (%reg) is as
4786       follows:
4787 
4788         again:
4789          old = * %reg
4790          new = old + 1
4791          atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4792 
4793       The "atomically" is the CAS bit.  The scheme is always the same:
4794       get old value from memory, compute new value, atomically stuff
4795       new value back in memory iff the old value has not changed (iow,
4796       no other thread modified it in the meantime).  If it has changed
4797       then we've been out-raced and we have to start over.
4798 
4799       Now that's all very neat, but it has the bad side effect of
4800       introducing an explicit equality test into the translation.
4801       Consider the behaviour of said code on a memory location which
4802       is uninitialised.  We will wind up doing a comparison on
4803       uninitialised data, and mc duly complains.
4804 
4805       What's difficult about this is, the common case is that the
4806       location is uncontended, and so we're usually comparing the same
4807       value (* %reg) with itself.  So we shouldn't complain even if it
4808       is undefined.  But mc doesn't know that.
4809 
4810       My solution is to mark the == in the IR specially, so as to tell
4811       mc that it almost certainly compares a value with itself, and we
4812       should just regard the result as always defined.  Rather than
4813       add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4814       Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4815 
4816       So there's always the question of, can this give a false
4817       negative?  eg, imagine that initially, * %reg is defined; and we
4818       read that; but then in the gap between the read and the CAS, a
4819       different thread writes an undefined (and different) value at
4820       the location.  Then the CAS in this thread will fail and we will
4821       go back to "again:", but without knowing that the trip back
4822       there was based on an undefined comparison.  No matter; at least
4823       the other thread won the race and the location is correctly
4824       marked as undefined.  What if it wrote an uninitialised version
4825       of the same value that was there originally, though?
4826 
4827       etc etc.  Seems like there's a small corner case in which we
4828       might lose the fact that something's defined -- we're out-raced
4829       in between the "old = * reg" and the "atomically {", _and_ the
4830       other thread is writing in an undefined version of what's
4831       already there.  Well, that seems pretty unlikely.
4832 
4833       ---
4834 
4835       If we ever need to reinstate it .. code which generates a
4836       definedness test for "expected == old" was removed at r10432 of
4837       this file.
4838    */
4839    if (cas->oldHi == IRTemp_INVALID) {
4840       do_shadow_CAS_single( mce, cas );
4841    } else {
4842       do_shadow_CAS_double( mce, cas );
4843    }
4844 }
4845 
4846 
do_shadow_CAS_single(MCEnv * mce,IRCAS * cas)4847 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4848 {
4849    IRAtom *vdataLo = NULL, *bdataLo = NULL;
4850    IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4851    IRAtom *voldLo  = NULL, *boldLo  = NULL;
4852    IRAtom *expd_eq_old = NULL;
4853    IROp   opCasCmpEQ;
4854    Int    elemSzB;
4855    IRType elemTy;
4856    Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4857 
4858    /* single CAS */
4859    tl_assert(cas->oldHi == IRTemp_INVALID);
4860    tl_assert(cas->expdHi == NULL);
4861    tl_assert(cas->dataHi == NULL);
4862 
4863    elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4864    switch (elemTy) {
4865       case Ity_I8:  elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8;  break;
4866       case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4867       case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4868       case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
4869       default: tl_assert(0); /* IR defn disallows any other types */
4870    }
4871 
4872    /* 1. fetch data# (the proposed new value) */
4873    tl_assert(isOriginalAtom(mce, cas->dataLo));
4874    vdataLo
4875       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4876    tl_assert(isShadowAtom(mce, vdataLo));
4877    if (otrak) {
4878       bdataLo
4879          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4880       tl_assert(isShadowAtom(mce, bdataLo));
4881    }
4882 
4883    /* 2. fetch expected# (what we expect to see at the address) */
4884    tl_assert(isOriginalAtom(mce, cas->expdLo));
4885    vexpdLo
4886       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4887    tl_assert(isShadowAtom(mce, vexpdLo));
4888    if (otrak) {
4889       bexpdLo
4890          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4891       tl_assert(isShadowAtom(mce, bexpdLo));
4892    }
4893 
4894    /* 3. check definedness of address */
4895    /* 4. fetch old# from shadow memory; this also checks
4896          addressibility of the address */
4897    voldLo
4898       = assignNew(
4899            'V', mce, elemTy,
4900            expr2vbits_Load(
4901               mce,
4902               cas->end, elemTy, cas->addr, 0/*Addr bias*/
4903         ));
4904    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
4905    if (otrak) {
4906       boldLo
4907          = assignNew('B', mce, Ity_I32,
4908                      gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
4909       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
4910    }
4911 
4912    /* 5. the CAS itself */
4913    stmt( 'C', mce, IRStmt_CAS(cas) );
4914 
4915    /* 6. compute "expected == old" */
4916    /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
4917    /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4918       tree, but it's not copied from the input block. */
4919    expd_eq_old
4920       = assignNew('C', mce, Ity_I1,
4921                   binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
4922 
4923    /* 7. if "expected == old"
4924             store data# to shadow memory */
4925    do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
4926                     NULL/*data*/, vdataLo/*vdata*/,
4927                     expd_eq_old/*guard for store*/ );
4928    if (otrak) {
4929       gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
4930                    bdataLo/*bdata*/,
4931                    expd_eq_old/*guard for store*/ );
4932    }
4933 }
4934 
4935 
do_shadow_CAS_double(MCEnv * mce,IRCAS * cas)4936 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
4937 {
4938    IRAtom *vdataHi = NULL, *bdataHi = NULL;
4939    IRAtom *vdataLo = NULL, *bdataLo = NULL;
4940    IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
4941    IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4942    IRAtom *voldHi  = NULL, *boldHi  = NULL;
4943    IRAtom *voldLo  = NULL, *boldLo  = NULL;
4944    IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
4945    IRAtom *expd_eq_old = NULL, *zero = NULL;
4946    IROp   opCasCmpEQ, opOr, opXor;
4947    Int    elemSzB, memOffsLo, memOffsHi;
4948    IRType elemTy;
4949    Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4950 
4951    /* double CAS */
4952    tl_assert(cas->oldHi != IRTemp_INVALID);
4953    tl_assert(cas->expdHi != NULL);
4954    tl_assert(cas->dataHi != NULL);
4955 
4956    elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4957    switch (elemTy) {
4958       case Ity_I8:
4959          opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
4960          elemSzB = 1; zero = mkU8(0);
4961          break;
4962       case Ity_I16:
4963          opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
4964          elemSzB = 2; zero = mkU16(0);
4965          break;
4966       case Ity_I32:
4967          opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
4968          elemSzB = 4; zero = mkU32(0);
4969          break;
4970       case Ity_I64:
4971          opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
4972          elemSzB = 8; zero = mkU64(0);
4973          break;
4974       default:
4975          tl_assert(0); /* IR defn disallows any other types */
4976    }
4977 
4978    /* 1. fetch data# (the proposed new value) */
4979    tl_assert(isOriginalAtom(mce, cas->dataHi));
4980    tl_assert(isOriginalAtom(mce, cas->dataLo));
4981    vdataHi
4982       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
4983    vdataLo
4984       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4985    tl_assert(isShadowAtom(mce, vdataHi));
4986    tl_assert(isShadowAtom(mce, vdataLo));
4987    if (otrak) {
4988       bdataHi
4989          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
4990       bdataLo
4991          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4992       tl_assert(isShadowAtom(mce, bdataHi));
4993       tl_assert(isShadowAtom(mce, bdataLo));
4994    }
4995 
4996    /* 2. fetch expected# (what we expect to see at the address) */
4997    tl_assert(isOriginalAtom(mce, cas->expdHi));
4998    tl_assert(isOriginalAtom(mce, cas->expdLo));
4999    vexpdHi
5000       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5001    vexpdLo
5002       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5003    tl_assert(isShadowAtom(mce, vexpdHi));
5004    tl_assert(isShadowAtom(mce, vexpdLo));
5005    if (otrak) {
5006       bexpdHi
5007          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5008       bexpdLo
5009          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5010       tl_assert(isShadowAtom(mce, bexpdHi));
5011       tl_assert(isShadowAtom(mce, bexpdLo));
5012    }
5013 
5014    /* 3. check definedness of address */
5015    /* 4. fetch old# from shadow memory; this also checks
5016          addressibility of the address */
5017    if (cas->end == Iend_LE) {
5018       memOffsLo = 0;
5019       memOffsHi = elemSzB;
5020    } else {
5021       tl_assert(cas->end == Iend_BE);
5022       memOffsLo = elemSzB;
5023       memOffsHi = 0;
5024    }
5025    voldHi
5026       = assignNew(
5027            'V', mce, elemTy,
5028            expr2vbits_Load(
5029               mce,
5030               cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
5031         ));
5032    voldLo
5033       = assignNew(
5034            'V', mce, elemTy,
5035            expr2vbits_Load(
5036               mce,
5037               cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
5038         ));
5039    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5040    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
5041    if (otrak) {
5042       boldHi
5043          = assignNew('B', mce, Ity_I32,
5044                      gen_load_b(mce, elemSzB, cas->addr,
5045                                 memOffsHi/*addr bias*/));
5046       boldLo
5047          = assignNew('B', mce, Ity_I32,
5048                      gen_load_b(mce, elemSzB, cas->addr,
5049                                 memOffsLo/*addr bias*/));
5050       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5051       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
5052    }
5053 
5054    /* 5. the CAS itself */
5055    stmt( 'C', mce, IRStmt_CAS(cas) );
5056 
5057    /* 6. compute "expected == old" */
5058    /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
5059    /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5060       tree, but it's not copied from the input block. */
5061    /*
5062       xHi = oldHi ^ expdHi;
5063       xLo = oldLo ^ expdLo;
5064       xHL = xHi | xLo;
5065       expd_eq_old = xHL == 0;
5066    */
5067    xHi = assignNew('C', mce, elemTy,
5068                    binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
5069    xLo = assignNew('C', mce, elemTy,
5070                    binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
5071    xHL = assignNew('C', mce, elemTy,
5072                    binop(opOr, xHi, xLo));
5073    expd_eq_old
5074       = assignNew('C', mce, Ity_I1,
5075                   binop(opCasCmpEQ, xHL, zero));
5076 
5077    /* 7. if "expected == old"
5078             store data# to shadow memory */
5079    do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5080                     NULL/*data*/, vdataHi/*vdata*/,
5081                     expd_eq_old/*guard for store*/ );
5082    do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5083                     NULL/*data*/, vdataLo/*vdata*/,
5084                     expd_eq_old/*guard for store*/ );
5085    if (otrak) {
5086       gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5087                    bdataHi/*bdata*/,
5088                    expd_eq_old/*guard for store*/ );
5089       gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5090                    bdataLo/*bdata*/,
5091                    expd_eq_old/*guard for store*/ );
5092    }
5093 }
5094 
5095 
5096 /* ------ Dealing with LL/SC (not difficult) ------ */
5097 
do_shadow_LLSC(MCEnv * mce,IREndness stEnd,IRTemp stResult,IRExpr * stAddr,IRExpr * stStoredata)5098 static void do_shadow_LLSC ( MCEnv*    mce,
5099                              IREndness stEnd,
5100                              IRTemp    stResult,
5101                              IRExpr*   stAddr,
5102                              IRExpr*   stStoredata )
5103 {
5104    /* In short: treat a load-linked like a normal load followed by an
5105       assignment of the loaded (shadow) data to the result temporary.
5106       Treat a store-conditional like a normal store, and mark the
5107       result temporary as defined. */
5108    IRType resTy  = typeOfIRTemp(mce->sb->tyenv, stResult);
5109    IRTemp resTmp = findShadowTmpV(mce, stResult);
5110 
5111    tl_assert(isIRAtom(stAddr));
5112    if (stStoredata)
5113       tl_assert(isIRAtom(stStoredata));
5114 
5115    if (stStoredata == NULL) {
5116       /* Load Linked */
5117       /* Just treat this as a normal load, followed by an assignment of
5118          the value to .result. */
5119       /* Stay sane */
5120       tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5121                 || resTy == Ity_I16 || resTy == Ity_I8);
5122       assign( 'V', mce, resTmp,
5123                    expr2vbits_Load(
5124                       mce, stEnd, resTy, stAddr, 0/*addr bias*/));
5125    } else {
5126       /* Store Conditional */
5127       /* Stay sane */
5128       IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
5129                                    stStoredata);
5130       tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
5131                 || dataTy == Ity_I16 || dataTy == Ity_I8);
5132       do_shadow_Store( mce, stEnd,
5133                             stAddr, 0/* addr bias */,
5134                             stStoredata,
5135                             NULL /* shadow data */,
5136                             NULL/*guard*/ );
5137       /* This is a store conditional, so it writes to .result a value
5138          indicating whether or not the store succeeded.  Just claim
5139          this value is always defined.  In the PowerPC interpretation
5140          of store-conditional, definedness of the success indication
5141          depends on whether the address of the store matches the
5142          reservation address.  But we can't tell that here (and
5143          anyway, we're not being PowerPC-specific).  At least we are
5144          guaranteed that the definedness of the store address, and its
5145          addressibility, will be checked as per normal.  So it seems
5146          pretty safe to just say that the success indication is always
5147          defined.
5148 
5149          In schemeS, for origin tracking, we must correspondingly set
5150          a no-origin value for the origin shadow of .result.
5151       */
5152       tl_assert(resTy == Ity_I1);
5153       assign( 'V', mce, resTmp, definedOfType(resTy) );
5154    }
5155 }
5156 
5157 
5158 /*------------------------------------------------------------*/
5159 /*--- Memcheck main                                        ---*/
5160 /*------------------------------------------------------------*/
5161 
5162 static void schemeS ( MCEnv* mce, IRStmt* st );
5163 
isBogusAtom(IRAtom * at)5164 static Bool isBogusAtom ( IRAtom* at )
5165 {
5166    ULong n = 0;
5167    IRConst* con;
5168    tl_assert(isIRAtom(at));
5169    if (at->tag == Iex_RdTmp)
5170       return False;
5171    tl_assert(at->tag == Iex_Const);
5172    con = at->Iex.Const.con;
5173    switch (con->tag) {
5174       case Ico_U1:   return False;
5175       case Ico_U8:   n = (ULong)con->Ico.U8; break;
5176       case Ico_U16:  n = (ULong)con->Ico.U16; break;
5177       case Ico_U32:  n = (ULong)con->Ico.U32; break;
5178       case Ico_U64:  n = (ULong)con->Ico.U64; break;
5179       case Ico_F64:  return False;
5180       case Ico_F32i: return False;
5181       case Ico_F64i: return False;
5182       case Ico_V128: return False;
5183       default: ppIRExpr(at); tl_assert(0);
5184    }
5185    /* VG_(printf)("%llx\n", n); */
5186    return (/*32*/    n == 0xFEFEFEFFULL
5187            /*32*/ || n == 0x80808080ULL
5188            /*32*/ || n == 0x7F7F7F7FULL
5189            /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
5190            /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
5191            /*64*/ || n == 0x0000000000008080ULL
5192            /*64*/ || n == 0x8080808080808080ULL
5193            /*64*/ || n == 0x0101010101010101ULL
5194           );
5195 }
5196 
checkForBogusLiterals(IRStmt * st)5197 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
5198 {
5199    Int      i;
5200    IRExpr*  e;
5201    IRDirty* d;
5202    IRCAS*   cas;
5203    switch (st->tag) {
5204       case Ist_WrTmp:
5205          e = st->Ist.WrTmp.data;
5206          switch (e->tag) {
5207             case Iex_Get:
5208             case Iex_RdTmp:
5209                return False;
5210             case Iex_Const:
5211                return isBogusAtom(e);
5212             case Iex_Unop:
5213                return isBogusAtom(e->Iex.Unop.arg);
5214             case Iex_GetI:
5215                return isBogusAtom(e->Iex.GetI.ix);
5216             case Iex_Binop:
5217                return isBogusAtom(e->Iex.Binop.arg1)
5218                       || isBogusAtom(e->Iex.Binop.arg2);
5219             case Iex_Triop:
5220                return isBogusAtom(e->Iex.Triop.details->arg1)
5221                       || isBogusAtom(e->Iex.Triop.details->arg2)
5222                       || isBogusAtom(e->Iex.Triop.details->arg3);
5223             case Iex_Qop:
5224                return isBogusAtom(e->Iex.Qop.details->arg1)
5225                       || isBogusAtom(e->Iex.Qop.details->arg2)
5226                       || isBogusAtom(e->Iex.Qop.details->arg3)
5227                       || isBogusAtom(e->Iex.Qop.details->arg4);
5228             case Iex_Mux0X:
5229                return isBogusAtom(e->Iex.Mux0X.cond)
5230                       || isBogusAtom(e->Iex.Mux0X.expr0)
5231                       || isBogusAtom(e->Iex.Mux0X.exprX);
5232             case Iex_Load:
5233                return isBogusAtom(e->Iex.Load.addr);
5234             case Iex_CCall:
5235                for (i = 0; e->Iex.CCall.args[i]; i++)
5236                   if (isBogusAtom(e->Iex.CCall.args[i]))
5237                      return True;
5238                return False;
5239             default:
5240                goto unhandled;
5241          }
5242       case Ist_Dirty:
5243          d = st->Ist.Dirty.details;
5244          for (i = 0; d->args[i]; i++)
5245             if (isBogusAtom(d->args[i]))
5246                return True;
5247          if (d->guard && isBogusAtom(d->guard))
5248             return True;
5249          if (d->mAddr && isBogusAtom(d->mAddr))
5250             return True;
5251          return False;
5252       case Ist_Put:
5253          return isBogusAtom(st->Ist.Put.data);
5254       case Ist_PutI:
5255          return isBogusAtom(st->Ist.PutI.details->ix)
5256                 || isBogusAtom(st->Ist.PutI.details->data);
5257       case Ist_Store:
5258          return isBogusAtom(st->Ist.Store.addr)
5259                 || isBogusAtom(st->Ist.Store.data);
5260       case Ist_Exit:
5261          return isBogusAtom(st->Ist.Exit.guard);
5262       case Ist_AbiHint:
5263          return isBogusAtom(st->Ist.AbiHint.base)
5264                 || isBogusAtom(st->Ist.AbiHint.nia);
5265       case Ist_NoOp:
5266       case Ist_IMark:
5267       case Ist_MBE:
5268          return False;
5269       case Ist_CAS:
5270          cas = st->Ist.CAS.details;
5271          return isBogusAtom(cas->addr)
5272                 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
5273                 || isBogusAtom(cas->expdLo)
5274                 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
5275                 || isBogusAtom(cas->dataLo);
5276       case Ist_LLSC:
5277          return isBogusAtom(st->Ist.LLSC.addr)
5278                 || (st->Ist.LLSC.storedata
5279                        ? isBogusAtom(st->Ist.LLSC.storedata)
5280                        : False);
5281       default:
5282       unhandled:
5283          ppIRStmt(st);
5284          VG_(tool_panic)("hasBogusLiterals");
5285    }
5286 }
5287 
5288 
MC_(instrument)5289 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
5290                         IRSB* sb_in,
5291                         VexGuestLayout* layout,
5292                         VexGuestExtents* vge,
5293                         IRType gWordTy, IRType hWordTy )
5294 {
5295    Bool    verboze = 0||False;
5296    Bool    bogus;
5297    Int     i, j, first_stmt;
5298    IRStmt* st;
5299    MCEnv   mce;
5300    IRSB*   sb_out;
5301 
5302    if (gWordTy != hWordTy) {
5303       /* We don't currently support this case. */
5304       VG_(tool_panic)("host/guest word size mismatch");
5305    }
5306 
5307    /* Check we're not completely nuts */
5308    tl_assert(sizeof(UWord)  == sizeof(void*));
5309    tl_assert(sizeof(Word)   == sizeof(void*));
5310    tl_assert(sizeof(Addr)   == sizeof(void*));
5311    tl_assert(sizeof(ULong)  == 8);
5312    tl_assert(sizeof(Long)   == 8);
5313    tl_assert(sizeof(Addr64) == 8);
5314    tl_assert(sizeof(UInt)   == 4);
5315    tl_assert(sizeof(Int)    == 4);
5316 
5317    tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
5318 
5319    /* Set up SB */
5320    sb_out = deepCopyIRSBExceptStmts(sb_in);
5321 
5322    /* Set up the running environment.  Both .sb and .tmpMap are
5323       modified as we go along.  Note that tmps are added to both
5324       .sb->tyenv and .tmpMap together, so the valid index-set for
5325       those two arrays should always be identical. */
5326    VG_(memset)(&mce, 0, sizeof(mce));
5327    mce.sb             = sb_out;
5328    mce.trace          = verboze;
5329    mce.layout         = layout;
5330    mce.hWordTy        = hWordTy;
5331    mce.bogusLiterals  = False;
5332 
5333    /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
5334       Darwin.  10.7 is mostly built with LLVM, which uses these for
5335       bitfield inserts, and we get a lot of false errors if the cheap
5336       interpretation is used, alas.  Could solve this much better if
5337       we knew which of such adds came from x86/amd64 LEA instructions,
5338       since these are the only ones really needing the expensive
5339       interpretation, but that would require some way to tag them in
5340       the _toIR.c front ends, which is a lot of faffing around.  So
5341       for now just use the slow and blunt-instrument solution. */
5342    mce.useLLVMworkarounds = False;
5343 #  if defined(VGO_darwin)
5344    mce.useLLVMworkarounds = True;
5345 #  endif
5346 
5347    mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
5348                             sizeof(TempMapEnt));
5349    for (i = 0; i < sb_in->tyenv->types_used; i++) {
5350       TempMapEnt ent;
5351       ent.kind    = Orig;
5352       ent.shadowV = IRTemp_INVALID;
5353       ent.shadowB = IRTemp_INVALID;
5354       VG_(addToXA)( mce.tmpMap, &ent );
5355    }
5356    tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
5357 
5358    /* Make a preliminary inspection of the statements, to see if there
5359       are any dodgy-looking literals.  If there are, we generate
5360       extra-detailed (hence extra-expensive) instrumentation in
5361       places.  Scan the whole bb even if dodgyness is found earlier,
5362       so that the flatness assertion is applied to all stmts. */
5363 
5364    bogus = False;
5365 
5366    for (i = 0; i < sb_in->stmts_used; i++) {
5367 
5368       st = sb_in->stmts[i];
5369       tl_assert(st);
5370       tl_assert(isFlatIRStmt(st));
5371 
5372       if (!bogus) {
5373          bogus = checkForBogusLiterals(st);
5374          if (0 && bogus) {
5375             VG_(printf)("bogus: ");
5376             ppIRStmt(st);
5377             VG_(printf)("\n");
5378          }
5379       }
5380 
5381    }
5382 
5383    mce.bogusLiterals = bogus;
5384 
5385    /* Copy verbatim any IR preamble preceding the first IMark */
5386 
5387    tl_assert(mce.sb == sb_out);
5388    tl_assert(mce.sb != sb_in);
5389 
5390    i = 0;
5391    while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
5392 
5393       st = sb_in->stmts[i];
5394       tl_assert(st);
5395       tl_assert(isFlatIRStmt(st));
5396 
5397       stmt( 'C', &mce, sb_in->stmts[i] );
5398       i++;
5399    }
5400 
5401    /* Nasty problem.  IR optimisation of the pre-instrumented IR may
5402       cause the IR following the preamble to contain references to IR
5403       temporaries defined in the preamble.  Because the preamble isn't
5404       instrumented, these temporaries don't have any shadows.
5405       Nevertheless uses of them following the preamble will cause
5406       memcheck to generate references to their shadows.  End effect is
5407       to cause IR sanity check failures, due to references to
5408       non-existent shadows.  This is only evident for the complex
5409       preambles used for function wrapping on TOC-afflicted platforms
5410       (ppc64-linux).
5411 
5412       The following loop therefore scans the preamble looking for
5413       assignments to temporaries.  For each one found it creates an
5414       assignment to the corresponding (V) shadow temp, marking it as
5415       'defined'.  This is the same resulting IR as if the main
5416       instrumentation loop before had been applied to the statement
5417       'tmp = CONSTANT'.
5418 
5419       Similarly, if origin tracking is enabled, we must generate an
5420       assignment for the corresponding origin (B) shadow, claiming
5421       no-origin, as appropriate for a defined value.
5422    */
5423    for (j = 0; j < i; j++) {
5424       if (sb_in->stmts[j]->tag == Ist_WrTmp) {
5425          /* findShadowTmpV checks its arg is an original tmp;
5426             no need to assert that here. */
5427          IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
5428          IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
5429          IRType ty_v  = typeOfIRTemp(sb_out->tyenv, tmp_v);
5430          assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
5431          if (MC_(clo_mc_level) == 3) {
5432             IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
5433             tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
5434             assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
5435          }
5436          if (0) {
5437             VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
5438             ppIRType( ty_v );
5439             VG_(printf)("\n");
5440          }
5441       }
5442    }
5443 
5444    /* Iterate over the remaining stmts to generate instrumentation. */
5445 
5446    tl_assert(sb_in->stmts_used > 0);
5447    tl_assert(i >= 0);
5448    tl_assert(i < sb_in->stmts_used);
5449    tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
5450 
5451    for (/* use current i*/; i < sb_in->stmts_used; i++) {
5452 
5453       st = sb_in->stmts[i];
5454       first_stmt = sb_out->stmts_used;
5455 
5456       if (verboze) {
5457          VG_(printf)("\n");
5458          ppIRStmt(st);
5459          VG_(printf)("\n");
5460       }
5461 
5462       if (MC_(clo_mc_level) == 3) {
5463          /* See comments on case Ist_CAS below. */
5464          if (st->tag != Ist_CAS)
5465             schemeS( &mce, st );
5466       }
5467 
5468       /* Generate instrumentation code for each stmt ... */
5469 
5470       switch (st->tag) {
5471 
5472          case Ist_WrTmp:
5473             assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
5474                                expr2vbits( &mce, st->Ist.WrTmp.data) );
5475             break;
5476 
5477          case Ist_Put:
5478             do_shadow_PUT( &mce,
5479                            st->Ist.Put.offset,
5480                            st->Ist.Put.data,
5481                            NULL /* shadow atom */, NULL /* guard */ );
5482             break;
5483 
5484          case Ist_PutI:
5485             do_shadow_PUTI( &mce, st->Ist.PutI.details);
5486             break;
5487 
5488          case Ist_Store:
5489             do_shadow_Store( &mce, st->Ist.Store.end,
5490                                    st->Ist.Store.addr, 0/* addr bias */,
5491                                    st->Ist.Store.data,
5492                                    NULL /* shadow data */,
5493                                    NULL/*guard*/ );
5494             break;
5495 
5496          case Ist_Exit:
5497             complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
5498             break;
5499 
5500          case Ist_IMark:
5501             break;
5502 
5503          case Ist_NoOp:
5504          case Ist_MBE:
5505             break;
5506 
5507          case Ist_Dirty:
5508             do_shadow_Dirty( &mce, st->Ist.Dirty.details );
5509             break;
5510 
5511          case Ist_AbiHint:
5512             do_AbiHint( &mce, st->Ist.AbiHint.base,
5513                               st->Ist.AbiHint.len,
5514                               st->Ist.AbiHint.nia );
5515             break;
5516 
5517          case Ist_CAS:
5518             do_shadow_CAS( &mce, st->Ist.CAS.details );
5519             /* Note, do_shadow_CAS copies the CAS itself to the output
5520                block, because it needs to add instrumentation both
5521                before and after it.  Hence skip the copy below.  Also
5522                skip the origin-tracking stuff (call to schemeS) above,
5523                since that's all tangled up with it too; do_shadow_CAS
5524                does it all. */
5525             break;
5526 
5527          case Ist_LLSC:
5528             do_shadow_LLSC( &mce,
5529                             st->Ist.LLSC.end,
5530                             st->Ist.LLSC.result,
5531                             st->Ist.LLSC.addr,
5532                             st->Ist.LLSC.storedata );
5533             break;
5534 
5535          default:
5536             VG_(printf)("\n");
5537             ppIRStmt(st);
5538             VG_(printf)("\n");
5539             VG_(tool_panic)("memcheck: unhandled IRStmt");
5540 
5541       } /* switch (st->tag) */
5542 
5543       if (0 && verboze) {
5544          for (j = first_stmt; j < sb_out->stmts_used; j++) {
5545             VG_(printf)("   ");
5546             ppIRStmt(sb_out->stmts[j]);
5547             VG_(printf)("\n");
5548          }
5549          VG_(printf)("\n");
5550       }
5551 
5552       /* ... and finally copy the stmt itself to the output.  Except,
5553          skip the copy of IRCASs; see comments on case Ist_CAS
5554          above. */
5555       if (st->tag != Ist_CAS)
5556          stmt('C', &mce, st);
5557    }
5558 
5559    /* Now we need to complain if the jump target is undefined. */
5560    first_stmt = sb_out->stmts_used;
5561 
5562    if (verboze) {
5563       VG_(printf)("sb_in->next = ");
5564       ppIRExpr(sb_in->next);
5565       VG_(printf)("\n\n");
5566    }
5567 
5568    complainIfUndefined( &mce, sb_in->next, NULL );
5569 
5570    if (0 && verboze) {
5571       for (j = first_stmt; j < sb_out->stmts_used; j++) {
5572          VG_(printf)("   ");
5573          ppIRStmt(sb_out->stmts[j]);
5574          VG_(printf)("\n");
5575       }
5576       VG_(printf)("\n");
5577    }
5578 
5579    /* If this fails, there's been some serious snafu with tmp management,
5580       that should be investigated. */
5581    tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
5582    VG_(deleteXA)( mce.tmpMap );
5583 
5584    tl_assert(mce.sb == sb_out);
5585    return sb_out;
5586 }
5587 
5588 /*------------------------------------------------------------*/
5589 /*--- Post-tree-build final tidying                        ---*/
5590 /*------------------------------------------------------------*/
5591 
5592 /* This exploits the observation that Memcheck often produces
5593    repeated conditional calls of the form
5594 
5595    Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
5596 
5597    with the same guard expression G guarding the same helper call.
5598    The second and subsequent calls are redundant.  This usually
5599    results from instrumentation of guest code containing multiple
5600    memory references at different constant offsets from the same base
5601    register.  After optimisation of the instrumentation, you get a
5602    test for the definedness of the base register for each memory
5603    reference, which is kinda pointless.  MC_(final_tidy) therefore
5604    looks for such repeated calls and removes all but the first. */
5605 
5606 /* A struct for recording which (helper, guard) pairs we have already
5607    seen. */
5608 typedef
5609    struct { void* entry; IRExpr* guard; }
5610    Pair;
5611 
5612 /* Return True if e1 and e2 definitely denote the same value (used to
5613    compare guards).  Return False if unknown; False is the safe
5614    answer.  Since guest registers and guest memory do not have the
5615    SSA property we must return False if any Gets or Loads appear in
5616    the expression. */
5617 
sameIRValue(IRExpr * e1,IRExpr * e2)5618 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
5619 {
5620    if (e1->tag != e2->tag)
5621       return False;
5622    switch (e1->tag) {
5623       case Iex_Const:
5624          return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
5625       case Iex_Binop:
5626          return e1->Iex.Binop.op == e2->Iex.Binop.op
5627                 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
5628                 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
5629       case Iex_Unop:
5630          return e1->Iex.Unop.op == e2->Iex.Unop.op
5631                 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
5632       case Iex_RdTmp:
5633          return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
5634       case Iex_Mux0X:
5635          return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
5636                 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
5637                 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
5638       case Iex_Qop:
5639       case Iex_Triop:
5640       case Iex_CCall:
5641          /* be lazy.  Could define equality for these, but they never
5642             appear to be used. */
5643          return False;
5644       case Iex_Get:
5645       case Iex_GetI:
5646       case Iex_Load:
5647          /* be conservative - these may not give the same value each
5648             time */
5649          return False;
5650       case Iex_Binder:
5651          /* should never see this */
5652          /* fallthrough */
5653       default:
5654          VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5655          ppIRExpr(e1);
5656          VG_(tool_panic)("memcheck:sameIRValue");
5657          return False;
5658    }
5659 }
5660 
5661 /* See if 'pairs' already has an entry for (entry, guard).  Return
5662    True if so.  If not, add an entry. */
5663 
5664 static
check_or_add(XArray * pairs,IRExpr * guard,void * entry)5665 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5666 {
5667    Pair  p;
5668    Pair* pp;
5669    Int   i, n = VG_(sizeXA)( pairs );
5670    for (i = 0; i < n; i++) {
5671       pp = VG_(indexXA)( pairs, i );
5672       if (pp->entry == entry && sameIRValue(pp->guard, guard))
5673          return True;
5674    }
5675    p.guard = guard;
5676    p.entry = entry;
5677    VG_(addToXA)( pairs, &p );
5678    return False;
5679 }
5680 
is_helperc_value_checkN_fail(HChar * name)5681 static Bool is_helperc_value_checkN_fail ( HChar* name )
5682 {
5683    return
5684       0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5685       || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5686       || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5687       || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5688       || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5689       || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5690       || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5691       || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
5692 }
5693 
MC_(final_tidy)5694 IRSB* MC_(final_tidy) ( IRSB* sb_in )
5695 {
5696    Int i;
5697    IRStmt*   st;
5698    IRDirty*  di;
5699    IRExpr*   guard;
5700    IRCallee* cee;
5701    Bool      alreadyPresent;
5702    XArray*   pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5703                                  VG_(free), sizeof(Pair) );
5704    /* Scan forwards through the statements.  Each time a call to one
5705       of the relevant helpers is seen, check if we have made a
5706       previous call to the same helper using the same guard
5707       expression, and if so, delete the call. */
5708    for (i = 0; i < sb_in->stmts_used; i++) {
5709       st = sb_in->stmts[i];
5710       tl_assert(st);
5711       if (st->tag != Ist_Dirty)
5712          continue;
5713       di = st->Ist.Dirty.details;
5714       guard = di->guard;
5715       if (!guard)
5716          continue;
5717       if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5718       cee = di->cee;
5719       if (!is_helperc_value_checkN_fail( cee->name ))
5720          continue;
5721        /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5722           guard 'guard'.  Check if we have already seen a call to this
5723           function with the same guard.  If so, delete it.  If not,
5724           add it to the set of calls we do know about. */
5725       alreadyPresent = check_or_add( pairs, guard, cee->addr );
5726       if (alreadyPresent) {
5727          sb_in->stmts[i] = IRStmt_NoOp();
5728          if (0) VG_(printf)("XX\n");
5729       }
5730    }
5731    VG_(deleteXA)( pairs );
5732    return sb_in;
5733 }
5734 
5735 
5736 /*------------------------------------------------------------*/
5737 /*--- Origin tracking stuff                                ---*/
5738 /*------------------------------------------------------------*/
5739 
5740 /* Almost identical to findShadowTmpV. */
findShadowTmpB(MCEnv * mce,IRTemp orig)5741 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5742 {
5743    TempMapEnt* ent;
5744    /* VG_(indexXA) range-checks 'orig', hence no need to check
5745       here. */
5746    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5747    tl_assert(ent->kind == Orig);
5748    if (ent->shadowB == IRTemp_INVALID) {
5749       IRTemp tmpB
5750         = newTemp( mce, Ity_I32, BSh );
5751       /* newTemp may cause mce->tmpMap to resize, hence previous results
5752          from VG_(indexXA) are invalid. */
5753       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5754       tl_assert(ent->kind == Orig);
5755       tl_assert(ent->shadowB == IRTemp_INVALID);
5756       ent->shadowB = tmpB;
5757    }
5758    return ent->shadowB;
5759 }
5760 
gen_maxU32(MCEnv * mce,IRAtom * b1,IRAtom * b2)5761 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5762 {
5763    return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5764 }
5765 
gen_load_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset)5766 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5767                             IRAtom* baseaddr, Int offset )
5768 {
5769    void*    hFun;
5770    HChar*   hName;
5771    IRTemp   bTmp;
5772    IRDirty* di;
5773    IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5774    IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5775    IRAtom*  ea    = baseaddr;
5776    if (offset != 0) {
5777       IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5778                                    : mkU64( (Long)(Int)offset );
5779       ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5780    }
5781    bTmp = newTemp(mce, mce->hWordTy, BSh);
5782 
5783    switch (szB) {
5784       case 1: hFun  = (void*)&MC_(helperc_b_load1);
5785               hName = "MC_(helperc_b_load1)";
5786               break;
5787       case 2: hFun  = (void*)&MC_(helperc_b_load2);
5788               hName = "MC_(helperc_b_load2)";
5789               break;
5790       case 4: hFun  = (void*)&MC_(helperc_b_load4);
5791               hName = "MC_(helperc_b_load4)";
5792               break;
5793       case 8: hFun  = (void*)&MC_(helperc_b_load8);
5794               hName = "MC_(helperc_b_load8)";
5795               break;
5796       case 16: hFun  = (void*)&MC_(helperc_b_load16);
5797                hName = "MC_(helperc_b_load16)";
5798                break;
5799       case 32: hFun  = (void*)&MC_(helperc_b_load32);
5800                hName = "MC_(helperc_b_load32)";
5801                break;
5802       default:
5803          VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5804          tl_assert(0);
5805    }
5806    di = unsafeIRDirty_1_N(
5807            bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5808            mkIRExprVec_1( ea )
5809         );
5810    /* no need to mess with any annotations.  This call accesses
5811       neither guest state nor guest memory. */
5812    stmt( 'B', mce, IRStmt_Dirty(di) );
5813    if (mce->hWordTy == Ity_I64) {
5814       /* 64-bit host */
5815       IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
5816       assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5817       return mkexpr(bTmp32);
5818    } else {
5819       /* 32-bit host */
5820       return mkexpr(bTmp);
5821    }
5822 }
5823 
gen_guarded_load_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset,IRAtom * guard)5824 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
5825                                     Int offset, IRAtom* guard )
5826 {
5827   if (guard) {
5828      IRAtom *cond, *iffalse, *iftrue;
5829 
5830      cond    = assignNew('B', mce, Ity_I8, unop(Iop_1Uto8, guard));
5831      iftrue  = assignNew('B', mce, Ity_I32,
5832                          gen_load_b(mce, szB, baseaddr, offset));
5833      iffalse = mkU32(0);
5834 
5835      return assignNew('B', mce, Ity_I32, IRExpr_Mux0X(cond, iffalse, iftrue));
5836   }
5837 
5838   return gen_load_b(mce, szB, baseaddr, offset);
5839 }
5840 
5841 /* Generate a shadow store.  guard :: Ity_I1 controls whether the
5842    store really happens; NULL means it unconditionally does. */
gen_store_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset,IRAtom * dataB,IRAtom * guard)5843 static void gen_store_b ( MCEnv* mce, Int szB,
5844                           IRAtom* baseaddr, Int offset, IRAtom* dataB,
5845                           IRAtom* guard )
5846 {
5847    void*    hFun;
5848    HChar*   hName;
5849    IRDirty* di;
5850    IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5851    IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5852    IRAtom*  ea    = baseaddr;
5853    if (guard) {
5854       tl_assert(isOriginalAtom(mce, guard));
5855       tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5856    }
5857    if (offset != 0) {
5858       IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5859                                    : mkU64( (Long)(Int)offset );
5860       ea = assignNew(  'B', mce, aTy, binop(opAdd, ea, off));
5861    }
5862    if (mce->hWordTy == Ity_I64)
5863       dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5864 
5865    switch (szB) {
5866       case 1: hFun  = (void*)&MC_(helperc_b_store1);
5867               hName = "MC_(helperc_b_store1)";
5868               break;
5869       case 2: hFun  = (void*)&MC_(helperc_b_store2);
5870               hName = "MC_(helperc_b_store2)";
5871               break;
5872       case 4: hFun  = (void*)&MC_(helperc_b_store4);
5873               hName = "MC_(helperc_b_store4)";
5874               break;
5875       case 8: hFun  = (void*)&MC_(helperc_b_store8);
5876               hName = "MC_(helperc_b_store8)";
5877               break;
5878       case 16: hFun  = (void*)&MC_(helperc_b_store16);
5879                hName = "MC_(helperc_b_store16)";
5880                break;
5881       case 32: hFun  = (void*)&MC_(helperc_b_store32);
5882                hName = "MC_(helperc_b_store32)";
5883                break;
5884       default:
5885          tl_assert(0);
5886    }
5887    di = unsafeIRDirty_0_N( 2/*regparms*/,
5888            hName, VG_(fnptr_to_fnentry)( hFun ),
5889            mkIRExprVec_2( ea, dataB )
5890         );
5891    /* no need to mess with any annotations.  This call accesses
5892       neither guest state nor guest memory. */
5893    if (guard) di->guard = guard;
5894    stmt( 'B', mce, IRStmt_Dirty(di) );
5895 }
5896 
narrowTo32(MCEnv * mce,IRAtom * e)5897 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
5898    IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5899    if (eTy == Ity_I64)
5900       return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5901    if (eTy == Ity_I32)
5902       return e;
5903    tl_assert(0);
5904 }
5905 
zWidenFrom32(MCEnv * mce,IRType dstTy,IRAtom * e)5906 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
5907    IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5908    tl_assert(eTy == Ity_I32);
5909    if (dstTy == Ity_I64)
5910       return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
5911    tl_assert(0);
5912 }
5913 
5914 
schemeE(MCEnv * mce,IRExpr * e)5915 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
5916 {
5917    tl_assert(MC_(clo_mc_level) == 3);
5918 
5919    switch (e->tag) {
5920 
5921       case Iex_GetI: {
5922          IRRegArray* descr_b;
5923          IRAtom      *t1, *t2, *t3, *t4;
5924          IRRegArray* descr      = e->Iex.GetI.descr;
5925          IRType equivIntTy
5926             = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5927          /* If this array is unshadowable for whatever reason, use the
5928             usual approximation. */
5929          if (equivIntTy == Ity_INVALID)
5930             return mkU32(0);
5931          tl_assert(sizeofIRType(equivIntTy) >= 4);
5932          tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5933          descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5934                                  equivIntTy, descr->nElems );
5935          /* Do a shadow indexed get of the same size, giving t1.  Take
5936             the bottom 32 bits of it, giving t2.  Compute into t3 the
5937             origin for the index (almost certainly zero, but there's
5938             no harm in being completely general here, since iropt will
5939             remove any useless code), and fold it in, giving a final
5940             value t4. */
5941          t1 = assignNew( 'B', mce, equivIntTy,
5942                           IRExpr_GetI( descr_b, e->Iex.GetI.ix,
5943                                                 e->Iex.GetI.bias ));
5944          t2 = narrowTo32( mce, t1 );
5945          t3 = schemeE( mce, e->Iex.GetI.ix );
5946          t4 = gen_maxU32( mce, t2, t3 );
5947          return t4;
5948       }
5949       case Iex_CCall: {
5950          Int i;
5951          IRAtom*  here;
5952          IRExpr** args = e->Iex.CCall.args;
5953          IRAtom*  curr = mkU32(0);
5954          for (i = 0; args[i]; i++) {
5955             tl_assert(i < 32);
5956             tl_assert(isOriginalAtom(mce, args[i]));
5957             /* Only take notice of this arg if the callee's
5958                mc-exclusion mask does not say it is to be excluded. */
5959             if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
5960                /* the arg is to be excluded from definedness checking.
5961                   Do nothing. */
5962                if (0) VG_(printf)("excluding %s(%d)\n",
5963                                   e->Iex.CCall.cee->name, i);
5964             } else {
5965                /* calculate the arg's definedness, and pessimistically
5966                   merge it in. */
5967                here = schemeE( mce, args[i] );
5968                curr = gen_maxU32( mce, curr, here );
5969             }
5970          }
5971          return curr;
5972       }
5973       case Iex_Load: {
5974          Int dszB;
5975          dszB = sizeofIRType(e->Iex.Load.ty);
5976          /* assert that the B value for the address is already
5977             available (somewhere) */
5978          tl_assert(isIRAtom(e->Iex.Load.addr));
5979          tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
5980          return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
5981       }
5982       case Iex_Mux0X: {
5983          IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
5984          IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
5985          IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
5986          return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
5987       }
5988       case Iex_Qop: {
5989          IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
5990          IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
5991          IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
5992          IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
5993          return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
5994                                  gen_maxU32( mce, b3, b4 ) );
5995       }
5996       case Iex_Triop: {
5997          IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
5998          IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
5999          IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
6000          return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6001       }
6002       case Iex_Binop: {
6003          switch (e->Iex.Binop.op) {
6004             case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
6005             case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6006             case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6007             case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6008                /* Just say these all produce a defined result,
6009                   regardless of their arguments.  See
6010                   COMMENT_ON_CasCmpEQ in this file. */
6011                return mkU32(0);
6012             default: {
6013                IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6014                IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6015                return gen_maxU32( mce, b1, b2 );
6016             }
6017          }
6018          tl_assert(0);
6019          /*NOTREACHED*/
6020       }
6021       case Iex_Unop: {
6022          IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6023          return b1;
6024       }
6025       case Iex_Const:
6026          return mkU32(0);
6027       case Iex_RdTmp:
6028          return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6029       case Iex_Get: {
6030          Int b_offset = MC_(get_otrack_shadow_offset)(
6031                            e->Iex.Get.offset,
6032                            sizeofIRType(e->Iex.Get.ty)
6033                         );
6034          tl_assert(b_offset >= -1
6035                    && b_offset <= mce->layout->total_sizeB -4);
6036          if (b_offset >= 0) {
6037             /* FIXME: this isn't an atom! */
6038             return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6039                                Ity_I32 );
6040          }
6041          return mkU32(0);
6042       }
6043       default:
6044          VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6045          ppIRExpr(e);
6046          VG_(tool_panic)("memcheck:schemeE");
6047    }
6048 }
6049 
6050 
do_origins_Dirty(MCEnv * mce,IRDirty * d)6051 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6052 {
6053    // This is a hacked version of do_shadow_Dirty
6054    Int       i, k, n, toDo, gSz, gOff;
6055    IRAtom    *here, *curr;
6056    IRTemp    dst;
6057 
6058    /* First check the guard. */
6059    curr = schemeE( mce, d->guard );
6060 
6061    /* Now round up all inputs and maxU32 over them. */
6062 
6063    /* Inputs: unmasked args
6064       Note: arguments are evaluated REGARDLESS of the guard expression */
6065    for (i = 0; d->args[i]; i++) {
6066       if (d->cee->mcx_mask & (1<<i)) {
6067          /* ignore this arg */
6068       } else {
6069          here = schemeE( mce, d->args[i] );
6070          curr = gen_maxU32( mce, curr, here );
6071       }
6072    }
6073 
6074    /* Inputs: guest state that we read. */
6075    for (i = 0; i < d->nFxState; i++) {
6076       tl_assert(d->fxState[i].fx != Ifx_None);
6077       if (d->fxState[i].fx == Ifx_Write)
6078          continue;
6079 
6080       /* Enumerate the described state segments */
6081       for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6082          gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6083          gSz  = d->fxState[i].size;
6084 
6085          /* Ignore any sections marked as 'always defined'. */
6086          if (isAlwaysDefd(mce, gOff, gSz)) {
6087             if (0)
6088             VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6089                         gOff, gSz);
6090             continue;
6091          }
6092 
6093          /* This state element is read or modified.  So we need to
6094             consider it.  If larger than 4 bytes, deal with it in
6095             4-byte chunks. */
6096          while (True) {
6097             Int b_offset;
6098             tl_assert(gSz >= 0);
6099             if (gSz == 0) break;
6100             n = gSz <= 4 ? gSz : 4;
6101             /* update 'curr' with maxU32 of the state slice
6102                gOff .. gOff+n-1 */
6103             b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6104             if (b_offset != -1) {
6105                /* Observe the guard expression. If it is false use 0, i.e.
6106                   nothing is known about the origin */
6107                IRAtom *cond, *iffalse, *iftrue;
6108 
6109                cond = assignNew( 'B', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
6110                iffalse = mkU32(0);
6111                iftrue  = assignNew( 'B', mce, Ity_I32,
6112                                     IRExpr_Get(b_offset
6113                                                  + 2*mce->layout->total_sizeB,
6114                                                Ity_I32));
6115                here = assignNew( 'B', mce, Ity_I32,
6116                                  IRExpr_Mux0X(cond, iffalse, iftrue));
6117                curr = gen_maxU32( mce, curr, here );
6118             }
6119             gSz -= n;
6120             gOff += n;
6121          }
6122       }
6123    }
6124 
6125    /* Inputs: memory */
6126 
6127    if (d->mFx != Ifx_None) {
6128       /* Because we may do multiple shadow loads/stores from the same
6129          base address, it's best to do a single test of its
6130          definedness right now.  Post-instrumentation optimisation
6131          should remove all but this test. */
6132       tl_assert(d->mAddr);
6133       here = schemeE( mce, d->mAddr );
6134       curr = gen_maxU32( mce, curr, here );
6135    }
6136 
6137    /* Deal with memory inputs (reads or modifies) */
6138    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
6139       toDo   = d->mSize;
6140       /* chew off 32-bit chunks.  We don't care about the endianness
6141          since it's all going to be condensed down to a single bit,
6142          but nevertheless choose an endianness which is hopefully
6143          native to the platform. */
6144       while (toDo >= 4) {
6145          here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6146                                     d->guard );
6147          curr = gen_maxU32( mce, curr, here );
6148          toDo -= 4;
6149       }
6150       /* handle possible 16-bit excess */
6151       while (toDo >= 2) {
6152          here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6153                                     d->guard );
6154          curr = gen_maxU32( mce, curr, here );
6155          toDo -= 2;
6156       }
6157       /* chew off the remaining 8-bit chunk, if any */
6158       if (toDo == 1) {
6159          here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6160                                     d->guard );
6161          curr = gen_maxU32( mce, curr, here );
6162          toDo -= 1;
6163       }
6164       tl_assert(toDo == 0);
6165    }
6166 
6167    /* Whew!  So curr is a 32-bit B-value which should give an origin
6168       of some use if any of the inputs to the helper are undefined.
6169       Now we need to re-distribute the results to all destinations. */
6170 
6171    /* Outputs: the destination temporary, if there is one. */
6172    if (d->tmp != IRTemp_INVALID) {
6173       dst   = findShadowTmpB(mce, d->tmp);
6174       assign( 'V', mce, dst, curr );
6175    }
6176 
6177    /* Outputs: guest state that we write or modify. */
6178    for (i = 0; i < d->nFxState; i++) {
6179       tl_assert(d->fxState[i].fx != Ifx_None);
6180       if (d->fxState[i].fx == Ifx_Read)
6181          continue;
6182 
6183       /* Enumerate the described state segments */
6184       for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6185          gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6186          gSz  = d->fxState[i].size;
6187 
6188          /* Ignore any sections marked as 'always defined'. */
6189          if (isAlwaysDefd(mce, gOff, gSz))
6190             continue;
6191 
6192          /* This state element is written or modified.  So we need to
6193             consider it.  If larger than 4 bytes, deal with it in
6194             4-byte chunks. */
6195          while (True) {
6196             Int b_offset;
6197             tl_assert(gSz >= 0);
6198             if (gSz == 0) break;
6199             n = gSz <= 4 ? gSz : 4;
6200             /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6201             b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6202             if (b_offset != -1) {
6203                if (d->guard) {
6204                   /* If the guard expression evaluates to false we simply Put
6205                      the value that is already stored in the guest state slot */
6206                   IRAtom *cond, *iffalse;
6207 
6208                   cond    = assignNew('B', mce, Ity_I8,
6209                                       unop(Iop_1Uto8, d->guard));
6210                   iffalse = assignNew('B', mce, Ity_I32,
6211                                       IRExpr_Get(b_offset +
6212                                                  2*mce->layout->total_sizeB,
6213                                                  Ity_I32));
6214                   curr = assignNew('V', mce, Ity_I32,
6215                                    IRExpr_Mux0X(cond, iffalse, curr));
6216                }
6217                stmt( 'B', mce, IRStmt_Put(b_offset
6218                                              + 2*mce->layout->total_sizeB,
6219                                           curr ));
6220             }
6221             gSz -= n;
6222             gOff += n;
6223          }
6224       }
6225    }
6226 
6227    /* Outputs: memory that we write or modify.  Same comments about
6228       endianness as above apply. */
6229    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
6230       toDo   = d->mSize;
6231       /* chew off 32-bit chunks */
6232       while (toDo >= 4) {
6233          gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
6234                       d->guard );
6235          toDo -= 4;
6236       }
6237       /* handle possible 16-bit excess */
6238       while (toDo >= 2) {
6239         gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
6240                      d->guard );
6241          toDo -= 2;
6242       }
6243       /* chew off the remaining 8-bit chunk, if any */
6244       if (toDo == 1) {
6245          gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
6246                       d->guard );
6247          toDo -= 1;
6248       }
6249       tl_assert(toDo == 0);
6250    }
6251 }
6252 
6253 
do_origins_Store(MCEnv * mce,IREndness stEnd,IRExpr * stAddr,IRExpr * stData)6254 static void do_origins_Store ( MCEnv* mce,
6255                                IREndness stEnd,
6256                                IRExpr* stAddr,
6257                                IRExpr* stData )
6258 {
6259    Int     dszB;
6260    IRAtom* dataB;
6261    /* assert that the B value for the address is already available
6262       (somewhere), since the call to schemeE will want to see it.
6263       XXXX how does this actually ensure that?? */
6264    tl_assert(isIRAtom(stAddr));
6265    tl_assert(isIRAtom(stData));
6266    dszB  = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
6267    dataB = schemeE( mce, stData );
6268    gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
6269                      NULL/*guard*/ );
6270 }
6271 
6272 
schemeS(MCEnv * mce,IRStmt * st)6273 static void schemeS ( MCEnv* mce, IRStmt* st )
6274 {
6275    tl_assert(MC_(clo_mc_level) == 3);
6276 
6277    switch (st->tag) {
6278 
6279       case Ist_AbiHint:
6280          /* The value-check instrumenter handles this - by arranging
6281             to pass the address of the next instruction to
6282             MC_(helperc_MAKE_STACK_UNINIT).  This is all that needs to
6283             happen for origin tracking w.r.t. AbiHints.  So there is
6284             nothing to do here. */
6285          break;
6286 
6287       case Ist_PutI: {
6288          IRPutI *puti = st->Ist.PutI.details;
6289          IRRegArray* descr_b;
6290          IRAtom      *t1, *t2, *t3, *t4;
6291          IRRegArray* descr = puti->descr;
6292          IRType equivIntTy
6293             = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6294          /* If this array is unshadowable for whatever reason,
6295             generate no code. */
6296          if (equivIntTy == Ity_INVALID)
6297             break;
6298          tl_assert(sizeofIRType(equivIntTy) >= 4);
6299          tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6300          descr_b
6301             = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6302                             equivIntTy, descr->nElems );
6303          /* Compute a value to Put - the conjoinment of the origin for
6304             the data to be Put-ted (obviously) and of the index value
6305             (not so obviously). */
6306          t1 = schemeE( mce, puti->data );
6307          t2 = schemeE( mce, puti->ix );
6308          t3 = gen_maxU32( mce, t1, t2 );
6309          t4 = zWidenFrom32( mce, equivIntTy, t3 );
6310          stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
6311                                                puti->bias, t4) ));
6312          break;
6313       }
6314 
6315       case Ist_Dirty:
6316          do_origins_Dirty( mce, st->Ist.Dirty.details );
6317          break;
6318 
6319       case Ist_Store:
6320          do_origins_Store( mce, st->Ist.Store.end,
6321                                 st->Ist.Store.addr,
6322                                 st->Ist.Store.data );
6323          break;
6324 
6325       case Ist_LLSC: {
6326          /* In short: treat a load-linked like a normal load followed
6327             by an assignment of the loaded (shadow) data the result
6328             temporary.  Treat a store-conditional like a normal store,
6329             and mark the result temporary as defined. */
6330          if (st->Ist.LLSC.storedata == NULL) {
6331             /* Load Linked */
6332             IRType resTy
6333                = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
6334             IRExpr* vanillaLoad
6335                = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
6336             tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6337                       || resTy == Ity_I16 || resTy == Ity_I8);
6338             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6339                               schemeE(mce, vanillaLoad));
6340          } else {
6341             /* Store conditional */
6342             do_origins_Store( mce, st->Ist.LLSC.end,
6343                                    st->Ist.LLSC.addr,
6344                                    st->Ist.LLSC.storedata );
6345             /* For the rationale behind this, see comments at the
6346                place where the V-shadow for .result is constructed, in
6347                do_shadow_LLSC.  In short, we regard .result as
6348                always-defined. */
6349             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6350                               mkU32(0) );
6351          }
6352          break;
6353       }
6354 
6355       case Ist_Put: {
6356          Int b_offset
6357             = MC_(get_otrack_shadow_offset)(
6358                  st->Ist.Put.offset,
6359                  sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
6360               );
6361          if (b_offset >= 0) {
6362             /* FIXME: this isn't an atom! */
6363             stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
6364                                        schemeE( mce, st->Ist.Put.data )) );
6365          }
6366          break;
6367       }
6368 
6369       case Ist_WrTmp:
6370          assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
6371                            schemeE(mce, st->Ist.WrTmp.data) );
6372          break;
6373 
6374       case Ist_MBE:
6375       case Ist_NoOp:
6376       case Ist_Exit:
6377       case Ist_IMark:
6378          break;
6379 
6380       default:
6381          VG_(printf)("mc_translate.c: schemeS: unhandled: ");
6382          ppIRStmt(st);
6383          VG_(tool_panic)("memcheck:schemeS");
6384    }
6385 }
6386 
6387 
6388 /*--------------------------------------------------------------------*/
6389 /*--- end                                           mc_translate.c ---*/
6390 /*--------------------------------------------------------------------*/
6391