1
2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations. ---*/
4 /*--- mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of MemCheck, a heavyweight Valgrind tool for
9 detecting memory errors.
10
11 Copyright (C) 2000-2017 Julian Seward
12 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #include "pub_tool_basics.h"
33 #include "pub_tool_poolalloc.h" // For mc_include.h
34 #include "pub_tool_hashtable.h" // For mc_include.h
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_tooliface.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_xarray.h"
40 #include "pub_tool_mallocfree.h"
41 #include "pub_tool_libcbase.h"
42
43 #include "mc_include.h"
44
45
46 /* FIXMEs JRS 2011-June-16.
47
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
51
52 Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional
53 saturating shifts): the interpretation is overly pessimistic.
54 See comments on the relevant cases below for details.
55
56 Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts,
57 both rounding and non-rounding variants): ditto
58 */
59
60 /* This file implements the Memcheck instrumentation, and in
61 particular contains the core of its undefined value detection
62 machinery. For a comprehensive background of the terminology,
63 algorithms and rationale used herein, read:
64
65 Using Valgrind to detect undefined value errors with
66 bit-precision
67
68 Julian Seward and Nicholas Nethercote
69
70 2005 USENIX Annual Technical Conference (General Track),
71 Anaheim, CA, USA, April 10-15, 2005.
72
73 ----
74
75 Here is as good a place as any to record exactly when V bits are and
76 should be checked, why, and what function is responsible.
77
78
79 Memcheck complains when an undefined value is used:
80
81 1. In the condition of a conditional branch. Because it could cause
82 incorrect control flow, and thus cause incorrect externally-visible
83 behaviour. [mc_translate.c:complainIfUndefined]
84
85 2. As an argument to a system call, or as the value that specifies
86 the system call number. Because it could cause an incorrect
87 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
88
89 3. As the address in a load or store. Because it could cause an
90 incorrect value to be used later, which could cause externally-visible
91 behaviour (eg. via incorrect control flow or an incorrect system call
92 argument) [complainIfUndefined]
93
94 4. As the target address of a branch. Because it could cause incorrect
95 control flow. [complainIfUndefined]
96
97 5. As an argument to setenv, unsetenv, or putenv. Because it could put
98 an incorrect value into the external environment.
99 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
100
101 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
102 [complainIfUndefined]
103
104 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
105 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
106 requested it. [in memcheck.h]
107
108
109 Memcheck also complains, but should not, when an undefined value is used:
110
111 8. As the shift value in certain SIMD shift operations (but not in the
112 standard integer shift operations). This inconsistency is due to
113 historical reasons.) [complainIfUndefined]
114
115
116 Memcheck does not complain, but should, when an undefined value is used:
117
118 9. As an input to a client request. Because the client request may
119 affect the visible behaviour -- see bug #144362 for an example
120 involving the malloc replacements in vg_replace_malloc.c and
121 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
122 isn't identified. That bug report also has some info on how to solve
123 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
124
125
126 In practice, 1 and 2 account for the vast majority of cases.
127 */
128
129 /* Generation of addr-definedness, addr-validity and
130 guard-definedness checks pertaining to loads and stores (Iex_Load,
131 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
132 loads/stores) was re-checked 11 May 2013. */
133
134 /*------------------------------------------------------------*/
135 /*--- Forward decls ---*/
136 /*------------------------------------------------------------*/
137
138 struct _MCEnv;
139
140 static IRType shadowTypeV ( IRType ty );
141 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
142 static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
143
144 static IRExpr *i128_const_zero(void);
145
146 /*------------------------------------------------------------*/
147 /*--- Memcheck running state, and tmp management. ---*/
148 /*------------------------------------------------------------*/
149
150 /* Carries info about a particular tmp. The tmp's number is not
151 recorded, as this is implied by (equal to) its index in the tmpMap
152 in MCEnv. The tmp's type is also not recorded, as this is present
153 in MCEnv.sb->tyenv.
154
155 When .kind is Orig, .shadowV and .shadowB may give the identities
156 of the temps currently holding the associated definedness (shadowV)
157 and origin (shadowB) values, or these may be IRTemp_INVALID if code
158 to compute such values has not yet been emitted.
159
160 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
161 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
162 illogical for a shadow tmp itself to be shadowed.
163 */
164 typedef
165 enum { Orig=1, VSh=2, BSh=3 }
166 TempKind;
167
168 typedef
169 struct {
170 TempKind kind;
171 IRTemp shadowV;
172 IRTemp shadowB;
173 }
174 TempMapEnt;
175
176
177 /* Carries around state during memcheck instrumentation. */
178 typedef
179 struct _MCEnv {
180 /* MODIFIED: the superblock being constructed. IRStmts are
181 added. */
182 IRSB* sb;
183 Bool trace;
184
185 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
186 current kind and possibly shadow temps for each temp in the
187 IRSB being constructed. Note that it does not contain the
188 type of each tmp. If you want to know the type, look at the
189 relevant entry in sb->tyenv. It follows that at all times
190 during the instrumentation process, the valid indices for
191 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
192 total number of Orig, V- and B- temps allocated so far.
193
194 The reason for this strange split (types in one place, all
195 other info in another) is that we need the types to be
196 attached to sb so as to make it possible to do
197 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
198 instrumentation process. */
199 XArray* /* of TempMapEnt */ tmpMap;
200
201 /* MODIFIED: indicates whether "bogus" literals have so far been
202 found. Starts off False, and may change to True. */
203 Bool bogusLiterals;
204
205 /* READONLY: indicates whether we should use expensive
206 interpretations of integer adds, since unfortunately LLVM
207 uses them to do ORs in some circumstances. Defaulted to True
208 on MacOS and False everywhere else. */
209 Bool useLLVMworkarounds;
210
211 /* READONLY: the guest layout. This indicates which parts of
212 the guest state should be regarded as 'always defined'. */
213 const VexGuestLayout* layout;
214
215 /* READONLY: the host word type. Needed for constructing
216 arguments of type 'HWord' to be passed to helper functions.
217 Ity_I32 or Ity_I64 only. */
218 IRType hWordTy;
219 }
220 MCEnv;
221
222 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
223 demand), as they are encountered. This is for two reasons.
224
225 (1) (less important reason): Many original tmps are unused due to
226 initial IR optimisation, and we do not want to spaces in tables
227 tracking them.
228
229 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
230 table indexed [0 .. n_types-1], which gives the current shadow for
231 each original tmp, or INVALID_IRTEMP if none is so far assigned.
232 It is necessary to support making multiple assignments to a shadow
233 -- specifically, after testing a shadow for definedness, it needs
234 to be made defined. But IR's SSA property disallows this.
235
236 (2) (more important reason): Therefore, when a shadow needs to get
237 a new value, a new temporary is created, the value is assigned to
238 that, and the tmpMap is updated to reflect the new binding.
239
240 A corollary is that if the tmpMap maps a given tmp to
241 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
242 there's a read-before-write error in the original tmps. The IR
243 sanity checker should catch all such anomalies, however.
244 */
245
246 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
247 both the table in mce->sb and to our auxiliary mapping. Note that
248 newTemp may cause mce->tmpMap to resize, hence previous results
249 from VG_(indexXA)(mce->tmpMap) are invalidated. */
newTemp(MCEnv * mce,IRType ty,TempKind kind)250 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
251 {
252 Word newIx;
253 TempMapEnt ent;
254 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
255 ent.kind = kind;
256 ent.shadowV = IRTemp_INVALID;
257 ent.shadowB = IRTemp_INVALID;
258 newIx = VG_(addToXA)( mce->tmpMap, &ent );
259 tl_assert(newIx == (Word)tmp);
260 return tmp;
261 }
262
263
264 /* Find the tmp currently shadowing the given original tmp. If none
265 so far exists, allocate one. */
findShadowTmpV(MCEnv * mce,IRTemp orig)266 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
267 {
268 TempMapEnt* ent;
269 /* VG_(indexXA) range-checks 'orig', hence no need to check
270 here. */
271 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
272 tl_assert(ent->kind == Orig);
273 if (ent->shadowV == IRTemp_INVALID) {
274 IRTemp tmpV
275 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
276 /* newTemp may cause mce->tmpMap to resize, hence previous results
277 from VG_(indexXA) are invalid. */
278 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
279 tl_assert(ent->kind == Orig);
280 tl_assert(ent->shadowV == IRTemp_INVALID);
281 ent->shadowV = tmpV;
282 }
283 return ent->shadowV;
284 }
285
286 /* Allocate a new shadow for the given original tmp. This means any
287 previous shadow is abandoned. This is needed because it is
288 necessary to give a new value to a shadow once it has been tested
289 for undefinedness, but unfortunately IR's SSA property disallows
290 this. Instead we must abandon the old shadow, allocate a new one
291 and use that instead.
292
293 This is the same as findShadowTmpV, except we don't bother to see
294 if a shadow temp already existed -- we simply allocate a new one
295 regardless. */
newShadowTmpV(MCEnv * mce,IRTemp orig)296 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
297 {
298 TempMapEnt* ent;
299 /* VG_(indexXA) range-checks 'orig', hence no need to check
300 here. */
301 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
302 tl_assert(ent->kind == Orig);
303 if (1) {
304 IRTemp tmpV
305 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
306 /* newTemp may cause mce->tmpMap to resize, hence previous results
307 from VG_(indexXA) are invalid. */
308 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
309 tl_assert(ent->kind == Orig);
310 ent->shadowV = tmpV;
311 }
312 }
313
314
315 /*------------------------------------------------------------*/
316 /*--- IRAtoms -- a subset of IRExprs ---*/
317 /*------------------------------------------------------------*/
318
319 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
320 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
321 input, most of this code deals in atoms. Usefully, a value atom
322 always has a V-value which is also an atom: constants are shadowed
323 by constants, and temps are shadowed by the corresponding shadow
324 temporary. */
325
326 typedef IRExpr IRAtom;
327
328 /* (used for sanity checks only): is this an atom which looks
329 like it's from original code? */
isOriginalAtom(MCEnv * mce,IRAtom * a1)330 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
331 {
332 if (a1->tag == Iex_Const)
333 return True;
334 if (a1->tag == Iex_RdTmp) {
335 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
336 return ent->kind == Orig;
337 }
338 return False;
339 }
340
341 /* (used for sanity checks only): is this an atom which looks
342 like it's from shadow code? */
isShadowAtom(MCEnv * mce,IRAtom * a1)343 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
344 {
345 if (a1->tag == Iex_Const)
346 return True;
347 if (a1->tag == Iex_RdTmp) {
348 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
349 return ent->kind == VSh || ent->kind == BSh;
350 }
351 return False;
352 }
353
354 /* (used for sanity checks only): check that both args are atoms and
355 are identically-kinded. */
sameKindedAtoms(IRAtom * a1,IRAtom * a2)356 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
357 {
358 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
359 return True;
360 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
361 return True;
362 return False;
363 }
364
365
366 /*------------------------------------------------------------*/
367 /*--- Type management ---*/
368 /*------------------------------------------------------------*/
369
370 /* Shadow state is always accessed using integer types. This returns
371 an integer type with the same size (as per sizeofIRType) as the
372 given type. The only valid shadow types are Bit, I8, I16, I32,
373 I64, I128, V128, V256. */
374
shadowTypeV(IRType ty)375 static IRType shadowTypeV ( IRType ty )
376 {
377 switch (ty) {
378 case Ity_I1:
379 case Ity_I8:
380 case Ity_I16:
381 case Ity_I32:
382 case Ity_I64:
383 case Ity_I128: return ty;
384 case Ity_F16: return Ity_I16;
385 case Ity_F32: return Ity_I32;
386 case Ity_D32: return Ity_I32;
387 case Ity_F64: return Ity_I64;
388 case Ity_D64: return Ity_I64;
389 case Ity_F128: return Ity_I128;
390 case Ity_D128: return Ity_I128;
391 case Ity_V128: return Ity_V128;
392 case Ity_V256: return Ity_V256;
393 default: ppIRType(ty);
394 VG_(tool_panic)("memcheck:shadowTypeV");
395 }
396 }
397
398 /* Produce a 'defined' value of the given shadow type. Should only be
399 supplied shadow types (Bit/I8/I16/I32/UI64). */
definedOfType(IRType ty)400 static IRExpr* definedOfType ( IRType ty ) {
401 switch (ty) {
402 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
403 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
404 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
405 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
406 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
407 case Ity_I128: return i128_const_zero();
408 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
409 case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000));
410 default: VG_(tool_panic)("memcheck:definedOfType");
411 }
412 }
413
414
415 /*------------------------------------------------------------*/
416 /*--- Constructing IR fragments ---*/
417 /*------------------------------------------------------------*/
418
419 /* add stmt to a bb */
stmt(HChar cat,MCEnv * mce,IRStmt * st)420 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
421 if (mce->trace) {
422 VG_(printf)(" %c: ", cat);
423 ppIRStmt(st);
424 VG_(printf)("\n");
425 }
426 addStmtToIRSB(mce->sb, st);
427 }
428
429 /* assign value to tmp */
430 static inline
assign(HChar cat,MCEnv * mce,IRTemp tmp,IRExpr * expr)431 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
432 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
433 }
434
435 /* build various kinds of expressions */
436 #define triop(_op, _arg1, _arg2, _arg3) \
437 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
438 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
439 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
440 #define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
441 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
442 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
443 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
444 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
445 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
446 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
447
448 /* Bind the given expression to a new temporary, and return the
449 temporary. This effectively converts an arbitrary expression into
450 an atom.
451
452 'ty' is the type of 'e' and hence the type that the new temporary
453 needs to be. But passing it in is redundant, since we can deduce
454 the type merely by inspecting 'e'. So at least use that fact to
455 assert that the two types agree. */
assignNew(HChar cat,MCEnv * mce,IRType ty,IRExpr * e)456 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
457 {
458 TempKind k;
459 IRTemp t;
460 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
461
462 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
463 switch (cat) {
464 case 'V': k = VSh; break;
465 case 'B': k = BSh; break;
466 case 'C': k = Orig; break;
467 /* happens when we are making up new "orig"
468 expressions, for IRCAS handling */
469 default: tl_assert(0);
470 }
471 t = newTemp(mce, ty, k);
472 assign(cat, mce, t, e);
473 return mkexpr(t);
474 }
475
476
477 /*------------------------------------------------------------*/
478 /*--- Helper functions for 128-bit ops ---*/
479 /*------------------------------------------------------------*/
480
i128_const_zero(void)481 static IRExpr *i128_const_zero(void)
482 {
483 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
484 return binop(Iop_64HLto128, z64, z64);
485 }
486
487 /* There are no I128-bit loads and/or stores [as generated by any
488 current front ends]. So we do not need to worry about that in
489 expr2vbits_Load */
490
491
492 /*------------------------------------------------------------*/
493 /*--- Constructing definedness primitive ops ---*/
494 /*------------------------------------------------------------*/
495
496 /* --------- Defined-if-either-defined --------- */
497
mkDifD8(MCEnv * mce,IRAtom * a1,IRAtom * a2)498 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
499 tl_assert(isShadowAtom(mce,a1));
500 tl_assert(isShadowAtom(mce,a2));
501 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
502 }
503
mkDifD16(MCEnv * mce,IRAtom * a1,IRAtom * a2)504 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
505 tl_assert(isShadowAtom(mce,a1));
506 tl_assert(isShadowAtom(mce,a2));
507 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
508 }
509
mkDifD32(MCEnv * mce,IRAtom * a1,IRAtom * a2)510 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
511 tl_assert(isShadowAtom(mce,a1));
512 tl_assert(isShadowAtom(mce,a2));
513 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
514 }
515
mkDifD64(MCEnv * mce,IRAtom * a1,IRAtom * a2)516 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
517 tl_assert(isShadowAtom(mce,a1));
518 tl_assert(isShadowAtom(mce,a2));
519 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
520 }
521
mkDifDV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)522 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
523 tl_assert(isShadowAtom(mce,a1));
524 tl_assert(isShadowAtom(mce,a2));
525 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
526 }
527
mkDifDV256(MCEnv * mce,IRAtom * a1,IRAtom * a2)528 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
529 tl_assert(isShadowAtom(mce,a1));
530 tl_assert(isShadowAtom(mce,a2));
531 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
532 }
533
534 /* --------- Undefined-if-either-undefined --------- */
535
mkUifU8(MCEnv * mce,IRAtom * a1,IRAtom * a2)536 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
537 tl_assert(isShadowAtom(mce,a1));
538 tl_assert(isShadowAtom(mce,a2));
539 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
540 }
541
mkUifU16(MCEnv * mce,IRAtom * a1,IRAtom * a2)542 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
543 tl_assert(isShadowAtom(mce,a1));
544 tl_assert(isShadowAtom(mce,a2));
545 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
546 }
547
mkUifU32(MCEnv * mce,IRAtom * a1,IRAtom * a2)548 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
549 tl_assert(isShadowAtom(mce,a1));
550 tl_assert(isShadowAtom(mce,a2));
551 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
552 }
553
mkUifU64(MCEnv * mce,IRAtom * a1,IRAtom * a2)554 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
555 tl_assert(isShadowAtom(mce,a1));
556 tl_assert(isShadowAtom(mce,a2));
557 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
558 }
559
mkUifU128(MCEnv * mce,IRAtom * a1,IRAtom * a2)560 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
561 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
562 tl_assert(isShadowAtom(mce,a1));
563 tl_assert(isShadowAtom(mce,a2));
564 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
565 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
566 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
567 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
568 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
569 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
570
571 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
572 }
573
mkUifUV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)574 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
575 tl_assert(isShadowAtom(mce,a1));
576 tl_assert(isShadowAtom(mce,a2));
577 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
578 }
579
mkUifUV256(MCEnv * mce,IRAtom * a1,IRAtom * a2)580 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
581 tl_assert(isShadowAtom(mce,a1));
582 tl_assert(isShadowAtom(mce,a2));
583 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
584 }
585
mkUifU(MCEnv * mce,IRType vty,IRAtom * a1,IRAtom * a2)586 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
587 switch (vty) {
588 case Ity_I8: return mkUifU8(mce, a1, a2);
589 case Ity_I16: return mkUifU16(mce, a1, a2);
590 case Ity_I32: return mkUifU32(mce, a1, a2);
591 case Ity_I64: return mkUifU64(mce, a1, a2);
592 case Ity_I128: return mkUifU128(mce, a1, a2);
593 case Ity_V128: return mkUifUV128(mce, a1, a2);
594 case Ity_V256: return mkUifUV256(mce, a1, a2);
595 default:
596 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
597 VG_(tool_panic)("memcheck:mkUifU");
598 }
599 }
600
601 /* --------- The Left-family of operations. --------- */
602
mkLeft8(MCEnv * mce,IRAtom * a1)603 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
604 tl_assert(isShadowAtom(mce,a1));
605 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
606 }
607
mkLeft16(MCEnv * mce,IRAtom * a1)608 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
609 tl_assert(isShadowAtom(mce,a1));
610 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
611 }
612
mkLeft32(MCEnv * mce,IRAtom * a1)613 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
614 tl_assert(isShadowAtom(mce,a1));
615 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
616 }
617
mkLeft64(MCEnv * mce,IRAtom * a1)618 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
619 tl_assert(isShadowAtom(mce,a1));
620 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
621 }
622
623 /* --------- 'Improvement' functions for AND/OR. --------- */
624
625 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
626 defined (0); all other -> undefined (1).
627 */
mkImproveAND8(MCEnv * mce,IRAtom * data,IRAtom * vbits)628 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629 {
630 tl_assert(isOriginalAtom(mce, data));
631 tl_assert(isShadowAtom(mce, vbits));
632 tl_assert(sameKindedAtoms(data, vbits));
633 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
634 }
635
mkImproveAND16(MCEnv * mce,IRAtom * data,IRAtom * vbits)636 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
637 {
638 tl_assert(isOriginalAtom(mce, data));
639 tl_assert(isShadowAtom(mce, vbits));
640 tl_assert(sameKindedAtoms(data, vbits));
641 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
642 }
643
mkImproveAND32(MCEnv * mce,IRAtom * data,IRAtom * vbits)644 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
645 {
646 tl_assert(isOriginalAtom(mce, data));
647 tl_assert(isShadowAtom(mce, vbits));
648 tl_assert(sameKindedAtoms(data, vbits));
649 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
650 }
651
mkImproveAND64(MCEnv * mce,IRAtom * data,IRAtom * vbits)652 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653 {
654 tl_assert(isOriginalAtom(mce, data));
655 tl_assert(isShadowAtom(mce, vbits));
656 tl_assert(sameKindedAtoms(data, vbits));
657 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
658 }
659
mkImproveANDV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)660 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
661 {
662 tl_assert(isOriginalAtom(mce, data));
663 tl_assert(isShadowAtom(mce, vbits));
664 tl_assert(sameKindedAtoms(data, vbits));
665 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
666 }
667
mkImproveANDV256(MCEnv * mce,IRAtom * data,IRAtom * vbits)668 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
669 {
670 tl_assert(isOriginalAtom(mce, data));
671 tl_assert(isShadowAtom(mce, vbits));
672 tl_assert(sameKindedAtoms(data, vbits));
673 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
674 }
675
676 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
677 defined (0); all other -> undefined (1).
678 */
mkImproveOR8(MCEnv * mce,IRAtom * data,IRAtom * vbits)679 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
680 {
681 tl_assert(isOriginalAtom(mce, data));
682 tl_assert(isShadowAtom(mce, vbits));
683 tl_assert(sameKindedAtoms(data, vbits));
684 return assignNew(
685 'V', mce, Ity_I8,
686 binop(Iop_Or8,
687 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
688 vbits) );
689 }
690
mkImproveOR16(MCEnv * mce,IRAtom * data,IRAtom * vbits)691 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
692 {
693 tl_assert(isOriginalAtom(mce, data));
694 tl_assert(isShadowAtom(mce, vbits));
695 tl_assert(sameKindedAtoms(data, vbits));
696 return assignNew(
697 'V', mce, Ity_I16,
698 binop(Iop_Or16,
699 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
700 vbits) );
701 }
702
mkImproveOR32(MCEnv * mce,IRAtom * data,IRAtom * vbits)703 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
704 {
705 tl_assert(isOriginalAtom(mce, data));
706 tl_assert(isShadowAtom(mce, vbits));
707 tl_assert(sameKindedAtoms(data, vbits));
708 return assignNew(
709 'V', mce, Ity_I32,
710 binop(Iop_Or32,
711 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
712 vbits) );
713 }
714
mkImproveOR64(MCEnv * mce,IRAtom * data,IRAtom * vbits)715 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
716 {
717 tl_assert(isOriginalAtom(mce, data));
718 tl_assert(isShadowAtom(mce, vbits));
719 tl_assert(sameKindedAtoms(data, vbits));
720 return assignNew(
721 'V', mce, Ity_I64,
722 binop(Iop_Or64,
723 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
724 vbits) );
725 }
726
mkImproveORV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)727 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
728 {
729 tl_assert(isOriginalAtom(mce, data));
730 tl_assert(isShadowAtom(mce, vbits));
731 tl_assert(sameKindedAtoms(data, vbits));
732 return assignNew(
733 'V', mce, Ity_V128,
734 binop(Iop_OrV128,
735 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
736 vbits) );
737 }
738
mkImproveORV256(MCEnv * mce,IRAtom * data,IRAtom * vbits)739 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
740 {
741 tl_assert(isOriginalAtom(mce, data));
742 tl_assert(isShadowAtom(mce, vbits));
743 tl_assert(sameKindedAtoms(data, vbits));
744 return assignNew(
745 'V', mce, Ity_V256,
746 binop(Iop_OrV256,
747 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
748 vbits) );
749 }
750
751 /* --------- Pessimising casts. --------- */
752
753 /* The function returns an expression of type DST_TY. If any of the VBITS
754 is undefined (value == 1) the resulting expression has all bits set to
755 1. Otherwise, all bits are 0. */
756
mkPCastTo(MCEnv * mce,IRType dst_ty,IRAtom * vbits)757 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
758 {
759 IRType src_ty;
760 IRAtom* tmp1;
761
762 /* Note, dst_ty is a shadow type, not an original type. */
763 tl_assert(isShadowAtom(mce,vbits));
764 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
765
766 /* Fast-track some common cases */
767 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
768 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
769
770 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
771 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
772
773 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
774 /* PCast the arg, then clone it. */
775 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
776 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
777 }
778
779 if (src_ty == Ity_I32 && dst_ty == Ity_V128) {
780 /* PCast the arg, then clone it 4 times. */
781 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
782 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
783 return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
784 }
785
786 if (src_ty == Ity_I32 && dst_ty == Ity_V256) {
787 /* PCast the arg, then clone it 8 times. */
788 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
789 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
790 tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
791 return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp));
792 }
793
794 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
795 /* PCast the arg. This gives all 0s or all 1s. Then throw away
796 the top half. */
797 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
798 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
799 }
800
801 if (src_ty == Ity_V128 && dst_ty == Ity_I64) {
802 /* Use InterleaveHI64x2 to copy the top half of the vector into
803 the bottom half. Then we can UifU it with the original, throw
804 away the upper half of the result, and PCast-I64-to-I64
805 the lower half. */
806 // Generates vbits[127:64] : vbits[127:64]
807 IRAtom* hi64hi64
808 = assignNew('V', mce, Ity_V128,
809 binop(Iop_InterleaveHI64x2, vbits, vbits));
810 // Generates
811 // UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0])
812 // == vbits[127:64] : UifU(vbits[127:64],vbits[63:0])
813 IRAtom* lohi64
814 = mkUifUV128(mce, hi64hi64, vbits);
815 // Generates UifU(vbits[127:64],vbits[63:0])
816 IRAtom* lo64
817 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, lohi64));
818 // Generates
819 // PCast-to-I64( UifU(vbits[127:64], vbits[63:0] )
820 // == PCast-to-I64( vbits[127:0] )
821 IRAtom* res
822 = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, lo64));
823 return res;
824 }
825
826 /* Else do it the slow way .. */
827 /* First of all, collapse vbits down to a single bit. */
828 tmp1 = NULL;
829 switch (src_ty) {
830 case Ity_I1:
831 tmp1 = vbits;
832 break;
833 case Ity_I8:
834 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
835 break;
836 case Ity_I16:
837 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
838 break;
839 case Ity_I32:
840 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
841 break;
842 case Ity_I64:
843 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
844 break;
845 case Ity_I128: {
846 /* Gah. Chop it in half, OR the halves together, and compare
847 that with zero. */
848 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
849 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
850 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
851 tmp1 = assignNew('V', mce, Ity_I1,
852 unop(Iop_CmpNEZ64, tmp4));
853 break;
854 }
855 case Ity_V128: {
856 /* Chop it in half, OR the halves together, and compare that
857 * with zero.
858 */
859 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vbits));
860 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vbits));
861 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
862 tmp1 = assignNew('V', mce, Ity_I1,
863 unop(Iop_CmpNEZ64, tmp4));
864 break;
865 }
866 default:
867 ppIRType(src_ty);
868 VG_(tool_panic)("mkPCastTo(1)");
869 }
870 tl_assert(tmp1);
871 /* Now widen up to the dst type. */
872 switch (dst_ty) {
873 case Ity_I1:
874 return tmp1;
875 case Ity_I8:
876 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
877 case Ity_I16:
878 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
879 case Ity_I32:
880 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
881 case Ity_I64:
882 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
883 case Ity_V128:
884 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
885 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
886 return tmp1;
887 case Ity_I128:
888 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
889 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
890 return tmp1;
891 case Ity_V256:
892 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
893 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
894 tmp1, tmp1));
895 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
896 tmp1, tmp1));
897 return tmp1;
898 default:
899 ppIRType(dst_ty);
900 VG_(tool_panic)("mkPCastTo(2)");
901 }
902 }
903
904 /* This is a minor variant. It takes an arg of some type and returns
905 a value of the same type. The result consists entirely of Defined
906 (zero) bits except its least significant bit, which is a PCast of
907 the entire argument down to a single bit. */
mkPCastXXtoXXlsb(MCEnv * mce,IRAtom * varg,IRType ty)908 static IRAtom* mkPCastXXtoXXlsb ( MCEnv* mce, IRAtom* varg, IRType ty )
909 {
910 if (ty == Ity_V128) {
911 /* --- Case for V128 --- */
912 IRAtom* varg128 = varg;
913 // generates: PCast-to-I64(varg128)
914 IRAtom* pcdTo64 = mkPCastTo(mce, Ity_I64, varg128);
915 // Now introduce zeros (defined bits) in the top 63 places
916 // generates: Def--(63)--Def PCast-to-I1(varg128)
917 IRAtom* d63pc
918 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcdTo64, mkU64(1)));
919 // generates: Def--(64)--Def
920 IRAtom* d64
921 = definedOfType(Ity_I64);
922 // generates: Def--(127)--Def PCast-to-I1(varg128)
923 IRAtom* res
924 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, d64, d63pc));
925 return res;
926 }
927 if (ty == Ity_I64) {
928 /* --- Case for I64 --- */
929 // PCast to 64
930 IRAtom* pcd = mkPCastTo(mce, Ity_I64, varg);
931 // Zero (Def) out the top 63 bits
932 IRAtom* res
933 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcd, mkU64(1)));
934 return res;
935 }
936 /*NOTREACHED*/
937 tl_assert(0);
938 }
939
940 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
941 /*
942 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
943 PCasting to Ity_U1. However, sometimes it is necessary to be more
944 accurate. The insight is that the result is defined if two
945 corresponding bits can be found, one from each argument, so that
946 both bits are defined but are different -- that makes EQ say "No"
947 and NE say "Yes". Hence, we compute an improvement term and DifD
948 it onto the "normal" (UifU) result.
949
950 The result is:
951
952 PCastTo<1> (
953 -- naive version
954 PCastTo<sz>( UifU<sz>(vxx, vyy) )
955
956 `DifD<sz>`
957
958 -- improvement term
959 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
960 )
961
962 where
963 vec contains 0 (defined) bits where the corresponding arg bits
964 are defined but different, and 1 bits otherwise.
965
966 vec = Or<sz>( vxx, // 0 iff bit defined
967 vyy, // 0 iff bit defined
968 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
969 )
970
971 If any bit of vec is 0, the result is defined and so the
972 improvement term should produce 0...0, else it should produce
973 1...1.
974
975 Hence require for the improvement term:
976
977 if vec == 1...1 then 1...1 else 0...0
978 ->
979 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
980
981 This was extensively re-analysed and checked on 6 July 05.
982 */
expensiveCmpEQorNE(MCEnv * mce,IRType ty,IRAtom * vxx,IRAtom * vyy,IRAtom * xx,IRAtom * yy)983 static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
984 IRType ty,
985 IRAtom* vxx, IRAtom* vyy,
986 IRAtom* xx, IRAtom* yy )
987 {
988 IRAtom *naive, *vec, *improvement_term;
989 IRAtom *improved, *final_cast, *top;
990 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
991
992 tl_assert(isShadowAtom(mce,vxx));
993 tl_assert(isShadowAtom(mce,vyy));
994 tl_assert(isOriginalAtom(mce,xx));
995 tl_assert(isOriginalAtom(mce,yy));
996 tl_assert(sameKindedAtoms(vxx,xx));
997 tl_assert(sameKindedAtoms(vyy,yy));
998
999 switch (ty) {
1000 case Ity_I16:
1001 opOR = Iop_Or16;
1002 opDIFD = Iop_And16;
1003 opUIFU = Iop_Or16;
1004 opNOT = Iop_Not16;
1005 opXOR = Iop_Xor16;
1006 opCMP = Iop_CmpEQ16;
1007 top = mkU16(0xFFFF);
1008 break;
1009 case Ity_I32:
1010 opOR = Iop_Or32;
1011 opDIFD = Iop_And32;
1012 opUIFU = Iop_Or32;
1013 opNOT = Iop_Not32;
1014 opXOR = Iop_Xor32;
1015 opCMP = Iop_CmpEQ32;
1016 top = mkU32(0xFFFFFFFF);
1017 break;
1018 case Ity_I64:
1019 opOR = Iop_Or64;
1020 opDIFD = Iop_And64;
1021 opUIFU = Iop_Or64;
1022 opNOT = Iop_Not64;
1023 opXOR = Iop_Xor64;
1024 opCMP = Iop_CmpEQ64;
1025 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
1026 break;
1027 default:
1028 VG_(tool_panic)("expensiveCmpEQorNE");
1029 }
1030
1031 naive
1032 = mkPCastTo(mce,ty,
1033 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
1034
1035 vec
1036 = assignNew(
1037 'V', mce,ty,
1038 binop( opOR,
1039 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
1040 assignNew(
1041 'V', mce,ty,
1042 unop( opNOT,
1043 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
1044
1045 improvement_term
1046 = mkPCastTo( mce,ty,
1047 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
1048
1049 improved
1050 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
1051
1052 final_cast
1053 = mkPCastTo( mce, Ity_I1, improved );
1054
1055 return final_cast;
1056 }
1057
1058
1059 /* --------- Semi-accurate interpretation of CmpORD. --------- */
1060
1061 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
1062
1063 CmpORD32S(x,y) = 1<<3 if x <s y
1064 = 1<<2 if x >s y
1065 = 1<<1 if x == y
1066
1067 and similarly the unsigned variant. The default interpretation is:
1068
1069 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
1070 & (7<<1)
1071
1072 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
1073 are zero and therefore defined (viz, zero).
1074
1075 Also deal with a special case better:
1076
1077 CmpORD32S(x,0)
1078
1079 Here, bit 3 (LT) of the result is a copy of the top bit of x and
1080 will be defined even if the rest of x isn't. In which case we do:
1081
1082 CmpORD32S#(x,x#,0,{impliedly 0}#)
1083 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
1084 | (x# >>u 31) << 3 -- LT# = x#[31]
1085
1086 Analogous handling for CmpORD64{S,U}.
1087 */
isZeroU32(IRAtom * e)1088 static Bool isZeroU32 ( IRAtom* e )
1089 {
1090 return
1091 toBool( e->tag == Iex_Const
1092 && e->Iex.Const.con->tag == Ico_U32
1093 && e->Iex.Const.con->Ico.U32 == 0 );
1094 }
1095
isZeroU64(IRAtom * e)1096 static Bool isZeroU64 ( IRAtom* e )
1097 {
1098 return
1099 toBool( e->tag == Iex_Const
1100 && e->Iex.Const.con->tag == Ico_U64
1101 && e->Iex.Const.con->Ico.U64 == 0 );
1102 }
1103
doCmpORD(MCEnv * mce,IROp cmp_op,IRAtom * xxhash,IRAtom * yyhash,IRAtom * xx,IRAtom * yy)1104 static IRAtom* doCmpORD ( MCEnv* mce,
1105 IROp cmp_op,
1106 IRAtom* xxhash, IRAtom* yyhash,
1107 IRAtom* xx, IRAtom* yy )
1108 {
1109 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1110 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1111 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
1112 IROp opAND = m64 ? Iop_And64 : Iop_And32;
1113 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
1114 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
1115 IRType ty = m64 ? Ity_I64 : Ity_I32;
1116 Int width = m64 ? 64 : 32;
1117
1118 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1119
1120 IRAtom* threeLeft1 = NULL;
1121 IRAtom* sevenLeft1 = NULL;
1122
1123 tl_assert(isShadowAtom(mce,xxhash));
1124 tl_assert(isShadowAtom(mce,yyhash));
1125 tl_assert(isOriginalAtom(mce,xx));
1126 tl_assert(isOriginalAtom(mce,yy));
1127 tl_assert(sameKindedAtoms(xxhash,xx));
1128 tl_assert(sameKindedAtoms(yyhash,yy));
1129 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1130 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
1131
1132 if (0) {
1133 ppIROp(cmp_op); VG_(printf)(" ");
1134 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1135 }
1136
1137 if (syned && isZero(yy)) {
1138 /* fancy interpretation */
1139 /* if yy is zero, then it must be fully defined (zero#). */
1140 tl_assert(isZero(yyhash));
1141 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
1142 return
1143 binop(
1144 opOR,
1145 assignNew(
1146 'V', mce,ty,
1147 binop(
1148 opAND,
1149 mkPCastTo(mce,ty, xxhash),
1150 threeLeft1
1151 )),
1152 assignNew(
1153 'V', mce,ty,
1154 binop(
1155 opSHL,
1156 assignNew(
1157 'V', mce,ty,
1158 binop(opSHR, xxhash, mkU8(width-1))),
1159 mkU8(3)
1160 ))
1161 );
1162 } else {
1163 /* standard interpretation */
1164 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
1165 return
1166 binop(
1167 opAND,
1168 mkPCastTo( mce,ty,
1169 mkUifU(mce,ty, xxhash,yyhash)),
1170 sevenLeft1
1171 );
1172 }
1173 }
1174
1175
1176 /*------------------------------------------------------------*/
1177 /*--- Emit a test and complaint if something is undefined. ---*/
1178 /*------------------------------------------------------------*/
1179
1180 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1181
1182
1183 /* Set the annotations on a dirty helper to indicate that the stack
1184 pointer and instruction pointers might be read. This is the
1185 behaviour of all 'emit-a-complaint' style functions we might
1186 call. */
1187
setHelperAnns(MCEnv * mce,IRDirty * di)1188 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1189 di->nFxState = 2;
1190 di->fxState[0].fx = Ifx_Read;
1191 di->fxState[0].offset = mce->layout->offset_SP;
1192 di->fxState[0].size = mce->layout->sizeof_SP;
1193 di->fxState[0].nRepeats = 0;
1194 di->fxState[0].repeatLen = 0;
1195 di->fxState[1].fx = Ifx_Read;
1196 di->fxState[1].offset = mce->layout->offset_IP;
1197 di->fxState[1].size = mce->layout->sizeof_IP;
1198 di->fxState[1].nRepeats = 0;
1199 di->fxState[1].repeatLen = 0;
1200 }
1201
1202
1203 /* Check the supplied *original* |atom| for undefinedness, and emit a
1204 complaint if so. Once that happens, mark it as defined. This is
1205 possible because the atom is either a tmp or literal. If it's a
1206 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1207 be defined. In fact as mentioned above, we will have to allocate a
1208 new tmp to carry the new 'defined' shadow value, and update the
1209 original->tmp mapping accordingly; we cannot simply assign a new
1210 value to an existing shadow tmp as this breaks SSAness.
1211
1212 The checks are performed, any resulting complaint emitted, and
1213 |atom|'s shadow temp set to 'defined', ONLY in the case that
1214 |guard| evaluates to True at run-time. If it evaluates to False
1215 then no action is performed. If |guard| is NULL (the usual case)
1216 then it is assumed to be always-true, and hence these actions are
1217 performed unconditionally.
1218
1219 This routine does not generate code to check the definedness of
1220 |guard|. The caller is assumed to have taken care of that already.
1221 */
complainIfUndefined(MCEnv * mce,IRAtom * atom,IRExpr * guard)1222 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
1223 {
1224 IRAtom* vatom;
1225 IRType ty;
1226 Int sz;
1227 IRDirty* di;
1228 IRAtom* cond;
1229 IRAtom* origin;
1230 void* fn;
1231 const HChar* nm;
1232 IRExpr** args;
1233 Int nargs;
1234
1235 // Don't do V bit tests if we're not reporting undefined value errors.
1236 if (MC_(clo_mc_level) == 1)
1237 return;
1238
1239 if (guard)
1240 tl_assert(isOriginalAtom(mce, guard));
1241
1242 /* Since the original expression is atomic, there's no duplicated
1243 work generated by making multiple V-expressions for it. So we
1244 don't really care about the possibility that someone else may
1245 also create a V-interpretion for it. */
1246 tl_assert(isOriginalAtom(mce, atom));
1247 vatom = expr2vbits( mce, atom );
1248 tl_assert(isShadowAtom(mce, vatom));
1249 tl_assert(sameKindedAtoms(atom, vatom));
1250
1251 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1252
1253 /* sz is only used for constructing the error message */
1254 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1255
1256 cond = mkPCastTo( mce, Ity_I1, vatom );
1257 /* cond will be 0 if all defined, and 1 if any not defined. */
1258
1259 /* Get the origin info for the value we are about to check. At
1260 least, if we are doing origin tracking. If not, use a dummy
1261 zero origin. */
1262 if (MC_(clo_mc_level) == 3) {
1263 origin = schemeE( mce, atom );
1264 if (mce->hWordTy == Ity_I64) {
1265 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1266 }
1267 } else {
1268 origin = NULL;
1269 }
1270
1271 fn = NULL;
1272 nm = NULL;
1273 args = NULL;
1274 nargs = -1;
1275
1276 switch (sz) {
1277 case 0:
1278 if (origin) {
1279 fn = &MC_(helperc_value_check0_fail_w_o);
1280 nm = "MC_(helperc_value_check0_fail_w_o)";
1281 args = mkIRExprVec_1(origin);
1282 nargs = 1;
1283 } else {
1284 fn = &MC_(helperc_value_check0_fail_no_o);
1285 nm = "MC_(helperc_value_check0_fail_no_o)";
1286 args = mkIRExprVec_0();
1287 nargs = 0;
1288 }
1289 break;
1290 case 1:
1291 if (origin) {
1292 fn = &MC_(helperc_value_check1_fail_w_o);
1293 nm = "MC_(helperc_value_check1_fail_w_o)";
1294 args = mkIRExprVec_1(origin);
1295 nargs = 1;
1296 } else {
1297 fn = &MC_(helperc_value_check1_fail_no_o);
1298 nm = "MC_(helperc_value_check1_fail_no_o)";
1299 args = mkIRExprVec_0();
1300 nargs = 0;
1301 }
1302 break;
1303 case 4:
1304 if (origin) {
1305 fn = &MC_(helperc_value_check4_fail_w_o);
1306 nm = "MC_(helperc_value_check4_fail_w_o)";
1307 args = mkIRExprVec_1(origin);
1308 nargs = 1;
1309 } else {
1310 fn = &MC_(helperc_value_check4_fail_no_o);
1311 nm = "MC_(helperc_value_check4_fail_no_o)";
1312 args = mkIRExprVec_0();
1313 nargs = 0;
1314 }
1315 break;
1316 case 8:
1317 if (origin) {
1318 fn = &MC_(helperc_value_check8_fail_w_o);
1319 nm = "MC_(helperc_value_check8_fail_w_o)";
1320 args = mkIRExprVec_1(origin);
1321 nargs = 1;
1322 } else {
1323 fn = &MC_(helperc_value_check8_fail_no_o);
1324 nm = "MC_(helperc_value_check8_fail_no_o)";
1325 args = mkIRExprVec_0();
1326 nargs = 0;
1327 }
1328 break;
1329 case 2:
1330 case 16:
1331 if (origin) {
1332 fn = &MC_(helperc_value_checkN_fail_w_o);
1333 nm = "MC_(helperc_value_checkN_fail_w_o)";
1334 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1335 nargs = 2;
1336 } else {
1337 fn = &MC_(helperc_value_checkN_fail_no_o);
1338 nm = "MC_(helperc_value_checkN_fail_no_o)";
1339 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1340 nargs = 1;
1341 }
1342 break;
1343 default:
1344 VG_(tool_panic)("unexpected szB");
1345 }
1346
1347 tl_assert(fn);
1348 tl_assert(nm);
1349 tl_assert(args);
1350 tl_assert(nargs >= 0 && nargs <= 2);
1351 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1352 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1353
1354 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1355 VG_(fnptr_to_fnentry)( fn ), args );
1356 di->guard = cond; // and cond is PCast-to-1(atom#)
1357
1358 /* If the complaint is to be issued under a guard condition, AND
1359 that into the guard condition for the helper call. */
1360 if (guard) {
1361 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1362 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1363 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1364 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1365 }
1366
1367 setHelperAnns( mce, di );
1368 stmt( 'V', mce, IRStmt_Dirty(di));
1369
1370 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1371 defined -- but only in the case where the guard evaluates to
1372 True at run-time. Do the update by setting the orig->shadow
1373 mapping for tmp to reflect the fact that this shadow is getting
1374 a new value. */
1375 tl_assert(isIRAtom(vatom));
1376 /* sameKindedAtoms ... */
1377 if (vatom->tag == Iex_RdTmp) {
1378 tl_assert(atom->tag == Iex_RdTmp);
1379 if (guard == NULL) {
1380 // guard is 'always True', hence update unconditionally
1381 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1382 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1383 definedOfType(ty));
1384 } else {
1385 // update the temp only conditionally. Do this by copying
1386 // its old value when the guard is False.
1387 // The old value ..
1388 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1389 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1390 IRAtom* new_tmpV
1391 = assignNew('V', mce, shadowTypeV(ty),
1392 IRExpr_ITE(guard, definedOfType(ty),
1393 mkexpr(old_tmpV)));
1394 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV);
1395 }
1396 }
1397 }
1398
1399
1400 /*------------------------------------------------------------*/
1401 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1402 /*------------------------------------------------------------*/
1403
1404 /* Examine the always-defined sections declared in layout to see if
1405 the (offset,size) section is within one. Note, is is an error to
1406 partially fall into such a region: (offset,size) should either be
1407 completely in such a region or completely not-in such a region.
1408 */
isAlwaysDefd(MCEnv * mce,Int offset,Int size)1409 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1410 {
1411 Int minoffD, maxoffD, i;
1412 Int minoff = offset;
1413 Int maxoff = minoff + size - 1;
1414 tl_assert((minoff & ~0xFFFF) == 0);
1415 tl_assert((maxoff & ~0xFFFF) == 0);
1416
1417 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1418 minoffD = mce->layout->alwaysDefd[i].offset;
1419 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1420 tl_assert((minoffD & ~0xFFFF) == 0);
1421 tl_assert((maxoffD & ~0xFFFF) == 0);
1422
1423 if (maxoff < minoffD || maxoffD < minoff)
1424 continue; /* no overlap */
1425 if (minoff >= minoffD && maxoff <= maxoffD)
1426 return True; /* completely contained in an always-defd section */
1427
1428 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1429 }
1430 return False; /* could not find any containing section */
1431 }
1432
1433
1434 /* Generate into bb suitable actions to shadow this Put. If the state
1435 slice is marked 'always defined', do nothing. Otherwise, write the
1436 supplied V bits to the shadow state. We can pass in either an
1437 original atom or a V-atom, but not both. In the former case the
1438 relevant V-bits are then generated from the original.
1439 We assume here, that the definedness of GUARD has already been checked.
1440 */
1441 static
do_shadow_PUT(MCEnv * mce,Int offset,IRAtom * atom,IRAtom * vatom,IRExpr * guard)1442 void do_shadow_PUT ( MCEnv* mce, Int offset,
1443 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
1444 {
1445 IRType ty;
1446
1447 // Don't do shadow PUTs if we're not doing undefined value checking.
1448 // Their absence lets Vex's optimiser remove all the shadow computation
1449 // that they depend on, which includes GETs of the shadow registers.
1450 if (MC_(clo_mc_level) == 1)
1451 return;
1452
1453 if (atom) {
1454 tl_assert(!vatom);
1455 tl_assert(isOriginalAtom(mce, atom));
1456 vatom = expr2vbits( mce, atom );
1457 } else {
1458 tl_assert(vatom);
1459 tl_assert(isShadowAtom(mce, vatom));
1460 }
1461
1462 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1463 tl_assert(ty != Ity_I1);
1464 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1465 /* later: no ... */
1466 /* emit code to emit a complaint if any of the vbits are 1. */
1467 /* complainIfUndefined(mce, atom); */
1468 } else {
1469 /* Do a plain shadow Put. */
1470 if (guard) {
1471 /* If the guard expression evaluates to false we simply Put the value
1472 that is already stored in the guest state slot */
1473 IRAtom *cond, *iffalse;
1474
1475 cond = assignNew('V', mce, Ity_I1, guard);
1476 iffalse = assignNew('V', mce, ty,
1477 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1478 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse));
1479 }
1480 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
1481 }
1482 }
1483
1484
1485 /* Return an expression which contains the V bits corresponding to the
1486 given GETI (passed in in pieces).
1487 */
1488 static
do_shadow_PUTI(MCEnv * mce,IRPutI * puti)1489 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
1490 {
1491 IRAtom* vatom;
1492 IRType ty, tyS;
1493 Int arrSize;;
1494 IRRegArray* descr = puti->descr;
1495 IRAtom* ix = puti->ix;
1496 Int bias = puti->bias;
1497 IRAtom* atom = puti->data;
1498
1499 // Don't do shadow PUTIs if we're not doing undefined value checking.
1500 // Their absence lets Vex's optimiser remove all the shadow computation
1501 // that they depend on, which includes GETIs of the shadow registers.
1502 if (MC_(clo_mc_level) == 1)
1503 return;
1504
1505 tl_assert(isOriginalAtom(mce,atom));
1506 vatom = expr2vbits( mce, atom );
1507 tl_assert(sameKindedAtoms(atom, vatom));
1508 ty = descr->elemTy;
1509 tyS = shadowTypeV(ty);
1510 arrSize = descr->nElems * sizeofIRType(ty);
1511 tl_assert(ty != Ity_I1);
1512 tl_assert(isOriginalAtom(mce,ix));
1513 complainIfUndefined(mce, ix, NULL);
1514 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1515 /* later: no ... */
1516 /* emit code to emit a complaint if any of the vbits are 1. */
1517 /* complainIfUndefined(mce, atom); */
1518 } else {
1519 /* Do a cloned version of the Put that refers to the shadow
1520 area. */
1521 IRRegArray* new_descr
1522 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1523 tyS, descr->nElems);
1524 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
1525 }
1526 }
1527
1528
1529 /* Return an expression which contains the V bits corresponding to the
1530 given GET (passed in in pieces).
1531 */
1532 static
shadow_GET(MCEnv * mce,Int offset,IRType ty)1533 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1534 {
1535 IRType tyS = shadowTypeV(ty);
1536 tl_assert(ty != Ity_I1);
1537 tl_assert(ty != Ity_I128);
1538 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1539 /* Always defined, return all zeroes of the relevant type */
1540 return definedOfType(tyS);
1541 } else {
1542 /* return a cloned version of the Get that refers to the shadow
1543 area. */
1544 /* FIXME: this isn't an atom! */
1545 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1546 }
1547 }
1548
1549
1550 /* Return an expression which contains the V bits corresponding to the
1551 given GETI (passed in in pieces).
1552 */
1553 static
shadow_GETI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias)1554 IRExpr* shadow_GETI ( MCEnv* mce,
1555 IRRegArray* descr, IRAtom* ix, Int bias )
1556 {
1557 IRType ty = descr->elemTy;
1558 IRType tyS = shadowTypeV(ty);
1559 Int arrSize = descr->nElems * sizeofIRType(ty);
1560 tl_assert(ty != Ity_I1);
1561 tl_assert(isOriginalAtom(mce,ix));
1562 complainIfUndefined(mce, ix, NULL);
1563 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1564 /* Always defined, return all zeroes of the relevant type */
1565 return definedOfType(tyS);
1566 } else {
1567 /* return a cloned version of the Get that refers to the shadow
1568 area. */
1569 IRRegArray* new_descr
1570 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1571 tyS, descr->nElems);
1572 return IRExpr_GetI( new_descr, ix, bias );
1573 }
1574 }
1575
1576
1577 /*------------------------------------------------------------*/
1578 /*--- Generating approximations for unknown operations, ---*/
1579 /*--- using lazy-propagate semantics ---*/
1580 /*------------------------------------------------------------*/
1581
1582 /* Lazy propagation of undefinedness from two values, resulting in the
1583 specified shadow type.
1584 */
1585 static
mkLazy2(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2)1586 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1587 {
1588 IRAtom* at;
1589 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1590 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1591 tl_assert(isShadowAtom(mce,va1));
1592 tl_assert(isShadowAtom(mce,va2));
1593
1594 /* The general case is inefficient because PCast is an expensive
1595 operation. Here are some special cases which use PCast only
1596 once rather than twice. */
1597
1598 /* I64 x I64 -> I64 */
1599 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1600 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1601 at = mkUifU(mce, Ity_I64, va1, va2);
1602 at = mkPCastTo(mce, Ity_I64, at);
1603 return at;
1604 }
1605
1606 /* I64 x I64 -> I32 */
1607 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1608 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1609 at = mkUifU(mce, Ity_I64, va1, va2);
1610 at = mkPCastTo(mce, Ity_I32, at);
1611 return at;
1612 }
1613
1614 /* I32 x I32 -> I32 */
1615 if (t1 == Ity_I32 && t2 == Ity_I32 && finalVty == Ity_I32) {
1616 if (0) VG_(printf)("mkLazy2: I32 x I32 -> I32\n");
1617 at = mkUifU(mce, Ity_I32, va1, va2);
1618 at = mkPCastTo(mce, Ity_I32, at);
1619 return at;
1620 }
1621
1622 if (0) {
1623 VG_(printf)("mkLazy2 ");
1624 ppIRType(t1);
1625 VG_(printf)("_");
1626 ppIRType(t2);
1627 VG_(printf)("_");
1628 ppIRType(finalVty);
1629 VG_(printf)("\n");
1630 }
1631
1632 /* General case: force everything via 32-bit intermediaries. */
1633 at = mkPCastTo(mce, Ity_I32, va1);
1634 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1635 at = mkPCastTo(mce, finalVty, at);
1636 return at;
1637 }
1638
1639
1640 /* 3-arg version of the above. */
1641 static
mkLazy3(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2,IRAtom * va3)1642 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1643 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1644 {
1645 IRAtom* at;
1646 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1647 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1648 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1649 tl_assert(isShadowAtom(mce,va1));
1650 tl_assert(isShadowAtom(mce,va2));
1651 tl_assert(isShadowAtom(mce,va3));
1652
1653 /* The general case is inefficient because PCast is an expensive
1654 operation. Here are some special cases which use PCast only
1655 twice rather than three times. */
1656
1657 /* I32 x I64 x I64 -> I64 */
1658 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1659 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1660 && finalVty == Ity_I64) {
1661 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1662 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1663 mode indication which is fully defined, this should get
1664 folded out later. */
1665 at = mkPCastTo(mce, Ity_I64, va1);
1666 /* Now fold in 2nd and 3rd args. */
1667 at = mkUifU(mce, Ity_I64, at, va2);
1668 at = mkUifU(mce, Ity_I64, at, va3);
1669 /* and PCast once again. */
1670 at = mkPCastTo(mce, Ity_I64, at);
1671 return at;
1672 }
1673
1674 /* I32 x I8 x I64 -> I64 */
1675 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64
1676 && finalVty == Ity_I64) {
1677 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n");
1678 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1679 * rounding mode indication which is fully defined, this should
1680 * get folded out later.
1681 */
1682 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1683 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1684 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1685 at = mkUifU(mce, Ity_I64, at, va3);
1686 /* and PCast once again. */
1687 at = mkPCastTo(mce, Ity_I64, at);
1688 return at;
1689 }
1690
1691 /* I32 x I64 x I64 -> I32 */
1692 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1693 && finalVty == Ity_I32) {
1694 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1695 at = mkPCastTo(mce, Ity_I64, va1);
1696 at = mkUifU(mce, Ity_I64, at, va2);
1697 at = mkUifU(mce, Ity_I64, at, va3);
1698 at = mkPCastTo(mce, Ity_I32, at);
1699 return at;
1700 }
1701
1702 /* I32 x I32 x I32 -> I32 */
1703 /* 32-bit FP idiom, as (eg) happens on ARM */
1704 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1705 && finalVty == Ity_I32) {
1706 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1707 at = va1;
1708 at = mkUifU(mce, Ity_I32, at, va2);
1709 at = mkUifU(mce, Ity_I32, at, va3);
1710 at = mkPCastTo(mce, Ity_I32, at);
1711 return at;
1712 }
1713
1714 /* I32 x I128 x I128 -> I128 */
1715 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1716 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1717 && finalVty == Ity_I128) {
1718 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1719 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1720 mode indication which is fully defined, this should get
1721 folded out later. */
1722 at = mkPCastTo(mce, Ity_I128, va1);
1723 /* Now fold in 2nd and 3rd args. */
1724 at = mkUifU(mce, Ity_I128, at, va2);
1725 at = mkUifU(mce, Ity_I128, at, va3);
1726 /* and PCast once again. */
1727 at = mkPCastTo(mce, Ity_I128, at);
1728 return at;
1729 }
1730
1731 /* I32 x I8 x I128 -> I128 */
1732 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1733 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128
1734 && finalVty == Ity_I128) {
1735 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n");
1736 /* Use I64 as an intermediate type, which means PCasting all 3
1737 args to I64 to start with. 1st arg is typically a rounding
1738 mode indication which is fully defined, so we hope that it
1739 will get folded out later. */
1740 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1741 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1742 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3);
1743 /* Now UifU all three together. */
1744 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1745 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3)
1746 /* and PCast once again. */
1747 at = mkPCastTo(mce, Ity_I128, at);
1748 return at;
1749 }
1750 if (1) {
1751 VG_(printf)("mkLazy3: ");
1752 ppIRType(t1);
1753 VG_(printf)(" x ");
1754 ppIRType(t2);
1755 VG_(printf)(" x ");
1756 ppIRType(t3);
1757 VG_(printf)(" -> ");
1758 ppIRType(finalVty);
1759 VG_(printf)("\n");
1760 }
1761
1762 tl_assert(0);
1763 /* General case: force everything via 32-bit intermediaries. */
1764 /*
1765 at = mkPCastTo(mce, Ity_I32, va1);
1766 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1767 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1768 at = mkPCastTo(mce, finalVty, at);
1769 return at;
1770 */
1771 }
1772
1773
1774 /* 4-arg version of the above. */
1775 static
mkLazy4(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2,IRAtom * va3,IRAtom * va4)1776 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1777 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1778 {
1779 IRAtom* at;
1780 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1781 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1782 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1783 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1784 tl_assert(isShadowAtom(mce,va1));
1785 tl_assert(isShadowAtom(mce,va2));
1786 tl_assert(isShadowAtom(mce,va3));
1787 tl_assert(isShadowAtom(mce,va4));
1788
1789 /* The general case is inefficient because PCast is an expensive
1790 operation. Here are some special cases which use PCast only
1791 twice rather than three times. */
1792
1793 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1794
1795 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128 && t4 == Ity_I128
1796 && finalVty == Ity_I128) {
1797 if (0) VG_(printf)("mkLazy4: I32 x I128 x I128 x I128 -> I128\n");
1798 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1799 mode indication which is fully defined, this should get
1800 folded out later. */
1801 at = mkPCastTo(mce, Ity_I128, va1);
1802 /* Now fold in 2nd, 3rd, 4th args. */
1803 at = mkUifU(mce, Ity_I128, at, va2);
1804 at = mkUifU(mce, Ity_I128, at, va3);
1805 at = mkUifU(mce, Ity_I128, at, va4);
1806 /* and PCast once again. */
1807 at = mkPCastTo(mce, Ity_I128, at);
1808 return at;
1809 }
1810
1811 /* I32 x I64 x I64 x I64 -> I64 */
1812 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1813 && finalVty == Ity_I64) {
1814 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1815 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1816 mode indication which is fully defined, this should get
1817 folded out later. */
1818 at = mkPCastTo(mce, Ity_I64, va1);
1819 /* Now fold in 2nd, 3rd, 4th args. */
1820 at = mkUifU(mce, Ity_I64, at, va2);
1821 at = mkUifU(mce, Ity_I64, at, va3);
1822 at = mkUifU(mce, Ity_I64, at, va4);
1823 /* and PCast once again. */
1824 at = mkPCastTo(mce, Ity_I64, at);
1825 return at;
1826 }
1827 /* I32 x I32 x I32 x I32 -> I32 */
1828 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1829 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1830 && finalVty == Ity_I32) {
1831 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1832 at = va1;
1833 /* Now fold in 2nd, 3rd, 4th args. */
1834 at = mkUifU(mce, Ity_I32, at, va2);
1835 at = mkUifU(mce, Ity_I32, at, va3);
1836 at = mkUifU(mce, Ity_I32, at, va4);
1837 at = mkPCastTo(mce, Ity_I32, at);
1838 return at;
1839 }
1840
1841 if (1) {
1842 VG_(printf)("mkLazy4: ");
1843 ppIRType(t1);
1844 VG_(printf)(" x ");
1845 ppIRType(t2);
1846 VG_(printf)(" x ");
1847 ppIRType(t3);
1848 VG_(printf)(" x ");
1849 ppIRType(t4);
1850 VG_(printf)(" -> ");
1851 ppIRType(finalVty);
1852 VG_(printf)("\n");
1853 }
1854
1855 tl_assert(0);
1856 }
1857
1858
1859 /* Do the lazy propagation game from a null-terminated vector of
1860 atoms. This is presumably the arguments to a helper call, so the
1861 IRCallee info is also supplied in order that we can know which
1862 arguments should be ignored (via the .mcx_mask field).
1863 */
1864 static
mkLazyN(MCEnv * mce,IRAtom ** exprvec,IRType finalVtype,IRCallee * cee)1865 IRAtom* mkLazyN ( MCEnv* mce,
1866 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1867 {
1868 Int i;
1869 IRAtom* here;
1870 IRAtom* curr;
1871 IRType mergeTy;
1872 Bool mergeTy64 = True;
1873
1874 /* Decide on the type of the merge intermediary. If all relevant
1875 args are I64, then it's I64. In all other circumstances, use
1876 I32. */
1877 for (i = 0; exprvec[i]; i++) {
1878 tl_assert(i < 32);
1879 tl_assert(isOriginalAtom(mce, exprvec[i]));
1880 if (cee->mcx_mask & (1<<i))
1881 continue;
1882 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
1883 mergeTy64 = False;
1884 }
1885
1886 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1887 curr = definedOfType(mergeTy);
1888
1889 for (i = 0; exprvec[i]; i++) {
1890 tl_assert(i < 32);
1891 tl_assert(isOriginalAtom(mce, exprvec[i]));
1892 /* Only take notice of this arg if the callee's mc-exclusion
1893 mask does not say it is to be excluded. */
1894 if (cee->mcx_mask & (1<<i)) {
1895 /* the arg is to be excluded from definedness checking. Do
1896 nothing. */
1897 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1898 } else {
1899 /* calculate the arg's definedness, and pessimistically merge
1900 it in. */
1901 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1902 curr = mergeTy64
1903 ? mkUifU64(mce, here, curr)
1904 : mkUifU32(mce, here, curr);
1905 }
1906 }
1907 return mkPCastTo(mce, finalVtype, curr );
1908 }
1909
1910
1911 /*------------------------------------------------------------*/
1912 /*--- Generating expensive sequences for exact carry-chain ---*/
1913 /*--- propagation in add/sub and related operations. ---*/
1914 /*------------------------------------------------------------*/
1915
1916 static
expensiveAddSub(MCEnv * mce,Bool add,IRType ty,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1917 IRAtom* expensiveAddSub ( MCEnv* mce,
1918 Bool add,
1919 IRType ty,
1920 IRAtom* qaa, IRAtom* qbb,
1921 IRAtom* aa, IRAtom* bb )
1922 {
1923 IRAtom *a_min, *b_min, *a_max, *b_max;
1924 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
1925
1926 tl_assert(isShadowAtom(mce,qaa));
1927 tl_assert(isShadowAtom(mce,qbb));
1928 tl_assert(isOriginalAtom(mce,aa));
1929 tl_assert(isOriginalAtom(mce,bb));
1930 tl_assert(sameKindedAtoms(qaa,aa));
1931 tl_assert(sameKindedAtoms(qbb,bb));
1932
1933 switch (ty) {
1934 case Ity_I32:
1935 opAND = Iop_And32;
1936 opOR = Iop_Or32;
1937 opXOR = Iop_Xor32;
1938 opNOT = Iop_Not32;
1939 opADD = Iop_Add32;
1940 opSUB = Iop_Sub32;
1941 break;
1942 case Ity_I64:
1943 opAND = Iop_And64;
1944 opOR = Iop_Or64;
1945 opXOR = Iop_Xor64;
1946 opNOT = Iop_Not64;
1947 opADD = Iop_Add64;
1948 opSUB = Iop_Sub64;
1949 break;
1950 default:
1951 VG_(tool_panic)("expensiveAddSub");
1952 }
1953
1954 // a_min = aa & ~qaa
1955 a_min = assignNew('V', mce,ty,
1956 binop(opAND, aa,
1957 assignNew('V', mce,ty, unop(opNOT, qaa))));
1958
1959 // b_min = bb & ~qbb
1960 b_min = assignNew('V', mce,ty,
1961 binop(opAND, bb,
1962 assignNew('V', mce,ty, unop(opNOT, qbb))));
1963
1964 // a_max = aa | qaa
1965 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
1966
1967 // b_max = bb | qbb
1968 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
1969
1970 if (add) {
1971 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1972 return
1973 assignNew('V', mce,ty,
1974 binop( opOR,
1975 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1976 assignNew('V', mce,ty,
1977 binop( opXOR,
1978 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1979 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
1980 )
1981 )
1982 )
1983 );
1984 } else {
1985 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1986 return
1987 assignNew('V', mce,ty,
1988 binop( opOR,
1989 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1990 assignNew('V', mce,ty,
1991 binop( opXOR,
1992 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1993 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
1994 )
1995 )
1996 )
1997 );
1998 }
1999
2000 }
2001
2002
2003 static
expensiveCountTrailingZeroes(MCEnv * mce,IROp czop,IRAtom * atom,IRAtom * vatom)2004 IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
2005 IRAtom* atom, IRAtom* vatom )
2006 {
2007 IRType ty;
2008 IROp xorOp, subOp, andOp;
2009 IRExpr *one;
2010 IRAtom *improver, *improved;
2011 tl_assert(isShadowAtom(mce,vatom));
2012 tl_assert(isOriginalAtom(mce,atom));
2013 tl_assert(sameKindedAtoms(atom,vatom));
2014
2015 switch (czop) {
2016 case Iop_Ctz32:
2017 ty = Ity_I32;
2018 xorOp = Iop_Xor32;
2019 subOp = Iop_Sub32;
2020 andOp = Iop_And32;
2021 one = mkU32(1);
2022 break;
2023 case Iop_Ctz64:
2024 ty = Ity_I64;
2025 xorOp = Iop_Xor64;
2026 subOp = Iop_Sub64;
2027 andOp = Iop_And64;
2028 one = mkU64(1);
2029 break;
2030 default:
2031 ppIROp(czop);
2032 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
2033 }
2034
2035 // improver = atom ^ (atom - 1)
2036 //
2037 // That is, improver has its low ctz(atom) bits equal to one;
2038 // higher bits (if any) equal to zero.
2039 improver = assignNew('V', mce,ty,
2040 binop(xorOp,
2041 atom,
2042 assignNew('V', mce, ty,
2043 binop(subOp, atom, one))));
2044
2045 // improved = vatom & improver
2046 //
2047 // That is, treat any V bits above the first ctz(atom) bits as
2048 // "defined".
2049 improved = assignNew('V', mce, ty,
2050 binop(andOp, vatom, improver));
2051
2052 // Return pessimizing cast of improved.
2053 return mkPCastTo(mce, ty, improved);
2054 }
2055
2056
2057 /*------------------------------------------------------------*/
2058 /*--- Scalar shifts. ---*/
2059 /*------------------------------------------------------------*/
2060
2061 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
2062 idea is to shift the definedness bits by the original shift amount.
2063 This introduces 0s ("defined") in new positions for left shifts and
2064 unsigned right shifts, and copies the top definedness bit for
2065 signed right shifts. So, conveniently, applying the original shift
2066 operator to the definedness bits for the left arg is exactly the
2067 right thing to do:
2068
2069 (qaa << bb)
2070
2071 However if the shift amount is undefined then the whole result
2072 is undefined. Hence need:
2073
2074 (qaa << bb) `UifU` PCast(qbb)
2075
2076 If the shift amount bb is a literal than qbb will say 'all defined'
2077 and the UifU and PCast will get folded out by post-instrumentation
2078 optimisation.
2079 */
scalarShift(MCEnv * mce,IRType ty,IROp original_op,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)2080 static IRAtom* scalarShift ( MCEnv* mce,
2081 IRType ty,
2082 IROp original_op,
2083 IRAtom* qaa, IRAtom* qbb,
2084 IRAtom* aa, IRAtom* bb )
2085 {
2086 tl_assert(isShadowAtom(mce,qaa));
2087 tl_assert(isShadowAtom(mce,qbb));
2088 tl_assert(isOriginalAtom(mce,aa));
2089 tl_assert(isOriginalAtom(mce,bb));
2090 tl_assert(sameKindedAtoms(qaa,aa));
2091 tl_assert(sameKindedAtoms(qbb,bb));
2092 return
2093 assignNew(
2094 'V', mce, ty,
2095 mkUifU( mce, ty,
2096 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
2097 mkPCastTo(mce, ty, qbb)
2098 )
2099 );
2100 }
2101
2102
2103 /*------------------------------------------------------------*/
2104 /*--- Helpers for dealing with vector primops. ---*/
2105 /*------------------------------------------------------------*/
2106
2107 /* Vector pessimisation -- pessimise within each lane individually. */
2108
mkPCast8x16(MCEnv * mce,IRAtom * at)2109 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
2110 {
2111 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
2112 }
2113
mkPCast16x8(MCEnv * mce,IRAtom * at)2114 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
2115 {
2116 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
2117 }
2118
mkPCast32x4(MCEnv * mce,IRAtom * at)2119 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
2120 {
2121 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
2122 }
2123
mkPCast64x2(MCEnv * mce,IRAtom * at)2124 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
2125 {
2126 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
2127 }
2128
mkPCast64x4(MCEnv * mce,IRAtom * at)2129 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
2130 {
2131 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
2132 }
2133
mkPCast32x8(MCEnv * mce,IRAtom * at)2134 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
2135 {
2136 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
2137 }
2138
mkPCast32x2(MCEnv * mce,IRAtom * at)2139 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
2140 {
2141 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
2142 }
2143
mkPCast16x16(MCEnv * mce,IRAtom * at)2144 static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
2145 {
2146 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
2147 }
2148
mkPCast16x4(MCEnv * mce,IRAtom * at)2149 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
2150 {
2151 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
2152 }
2153
mkPCast8x32(MCEnv * mce,IRAtom * at)2154 static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
2155 {
2156 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
2157 }
2158
mkPCast8x8(MCEnv * mce,IRAtom * at)2159 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
2160 {
2161 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
2162 }
2163
mkPCast16x2(MCEnv * mce,IRAtom * at)2164 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
2165 {
2166 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
2167 }
2168
mkPCast8x4(MCEnv * mce,IRAtom * at)2169 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
2170 {
2171 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
2172 }
2173
2174
2175 /* Here's a simple scheme capable of handling ops derived from SSE1
2176 code and while only generating ops that can be efficiently
2177 implemented in SSE1. */
2178
2179 /* All-lanes versions are straightforward:
2180
2181 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
2182
2183 unary32Fx4(x,y) ==> PCast32x4(x#)
2184
2185 Lowest-lane-only versions are more complex:
2186
2187 binary32F0x4(x,y) ==> SetV128lo32(
2188 x#,
2189 PCast32(V128to32(UifUV128(x#,y#)))
2190 )
2191
2192 This is perhaps not so obvious. In particular, it's faster to
2193 do a V128-bit UifU and then take the bottom 32 bits than the more
2194 obvious scheme of taking the bottom 32 bits of each operand
2195 and doing a 32-bit UifU. Basically since UifU is fast and
2196 chopping lanes off vector values is slow.
2197
2198 Finally:
2199
2200 unary32F0x4(x) ==> SetV128lo32(
2201 x#,
2202 PCast32(V128to32(x#))
2203 )
2204
2205 Where:
2206
2207 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2208 PCast32x4(v#) = CmpNEZ32x4(v#)
2209 */
2210
2211 static
binary32Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2212 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2213 {
2214 IRAtom* at;
2215 tl_assert(isShadowAtom(mce, vatomX));
2216 tl_assert(isShadowAtom(mce, vatomY));
2217 at = mkUifUV128(mce, vatomX, vatomY);
2218 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
2219 return at;
2220 }
2221
2222 static
unary32Fx4(MCEnv * mce,IRAtom * vatomX)2223 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2224 {
2225 IRAtom* at;
2226 tl_assert(isShadowAtom(mce, vatomX));
2227 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
2228 return at;
2229 }
2230
2231 static
binary32F0x4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2232 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2233 {
2234 IRAtom* at;
2235 tl_assert(isShadowAtom(mce, vatomX));
2236 tl_assert(isShadowAtom(mce, vatomY));
2237 at = mkUifUV128(mce, vatomX, vatomY);
2238 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
2239 at = mkPCastTo(mce, Ity_I32, at);
2240 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2241 return at;
2242 }
2243
2244 static
unary32F0x4(MCEnv * mce,IRAtom * vatomX)2245 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2246 {
2247 IRAtom* at;
2248 tl_assert(isShadowAtom(mce, vatomX));
2249 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
2250 at = mkPCastTo(mce, Ity_I32, at);
2251 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2252 return at;
2253 }
2254
2255 /* --- ... and ... 64Fx2 versions of the same ... --- */
2256
2257 static
binary64Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2258 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2259 {
2260 IRAtom* at;
2261 tl_assert(isShadowAtom(mce, vatomX));
2262 tl_assert(isShadowAtom(mce, vatomY));
2263 at = mkUifUV128(mce, vatomX, vatomY);
2264 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
2265 return at;
2266 }
2267
2268 static
unary64Fx2(MCEnv * mce,IRAtom * vatomX)2269 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2270 {
2271 IRAtom* at;
2272 tl_assert(isShadowAtom(mce, vatomX));
2273 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
2274 return at;
2275 }
2276
2277 static
binary64F0x2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2278 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2279 {
2280 IRAtom* at;
2281 tl_assert(isShadowAtom(mce, vatomX));
2282 tl_assert(isShadowAtom(mce, vatomY));
2283 at = mkUifUV128(mce, vatomX, vatomY);
2284 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
2285 at = mkPCastTo(mce, Ity_I64, at);
2286 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2287 return at;
2288 }
2289
2290 static
unary64F0x2(MCEnv * mce,IRAtom * vatomX)2291 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2292 {
2293 IRAtom* at;
2294 tl_assert(isShadowAtom(mce, vatomX));
2295 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
2296 at = mkPCastTo(mce, Ity_I64, at);
2297 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2298 return at;
2299 }
2300
2301 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
2302
2303 static
binary32Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2304 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2305 {
2306 IRAtom* at;
2307 tl_assert(isShadowAtom(mce, vatomX));
2308 tl_assert(isShadowAtom(mce, vatomY));
2309 at = mkUifU64(mce, vatomX, vatomY);
2310 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2311 return at;
2312 }
2313
2314 static
unary32Fx2(MCEnv * mce,IRAtom * vatomX)2315 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2316 {
2317 IRAtom* at;
2318 tl_assert(isShadowAtom(mce, vatomX));
2319 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2320 return at;
2321 }
2322
2323 /* --- ... and ... 64Fx4 versions of the same ... --- */
2324
2325 static
binary64Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2326 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2327 {
2328 IRAtom* at;
2329 tl_assert(isShadowAtom(mce, vatomX));
2330 tl_assert(isShadowAtom(mce, vatomY));
2331 at = mkUifUV256(mce, vatomX, vatomY);
2332 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2333 return at;
2334 }
2335
2336 static
unary64Fx4(MCEnv * mce,IRAtom * vatomX)2337 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2338 {
2339 IRAtom* at;
2340 tl_assert(isShadowAtom(mce, vatomX));
2341 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2342 return at;
2343 }
2344
2345 /* --- ... and ... 32Fx8 versions of the same ... --- */
2346
2347 static
binary32Fx8(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2348 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2349 {
2350 IRAtom* at;
2351 tl_assert(isShadowAtom(mce, vatomX));
2352 tl_assert(isShadowAtom(mce, vatomY));
2353 at = mkUifUV256(mce, vatomX, vatomY);
2354 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2355 return at;
2356 }
2357
2358 static
unary32Fx8(MCEnv * mce,IRAtom * vatomX)2359 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2360 {
2361 IRAtom* at;
2362 tl_assert(isShadowAtom(mce, vatomX));
2363 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2364 return at;
2365 }
2366
2367 /* --- 64Fx2 binary FP ops, with rounding mode --- */
2368
2369 static
binary64Fx2_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX,IRAtom * vatomY)2370 IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM,
2371 IRAtom* vatomX, IRAtom* vatomY )
2372 {
2373 /* This is the same as binary64Fx2, except that we subsequently
2374 pessimise vRM (definedness of the rounding mode), widen to 128
2375 bits and UifU it into the result. As with the scalar cases, if
2376 the RM is a constant then it is defined and so this extra bit
2377 will get constant-folded out later. */
2378 // "do" the vector args
2379 IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY);
2380 // PCast the RM, and widen it to 128 bits
2381 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2382 // Roll it into the result
2383 t1 = mkUifUV128(mce, t1, t2);
2384 return t1;
2385 }
2386
2387 /* --- ... and ... 32Fx4 versions of the same --- */
2388
2389 static
binary32Fx4_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX,IRAtom * vatomY)2390 IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2391 IRAtom* vatomX, IRAtom* vatomY )
2392 {
2393 IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY);
2394 // PCast the RM, and widen it to 128 bits
2395 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2396 // Roll it into the result
2397 t1 = mkUifUV128(mce, t1, t2);
2398 return t1;
2399 }
2400
2401 /* --- ... and ... 64Fx4 versions of the same --- */
2402
2403 static
binary64Fx4_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX,IRAtom * vatomY)2404 IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2405 IRAtom* vatomX, IRAtom* vatomY )
2406 {
2407 IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY);
2408 // PCast the RM, and widen it to 256 bits
2409 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2410 // Roll it into the result
2411 t1 = mkUifUV256(mce, t1, t2);
2412 return t1;
2413 }
2414
2415 /* --- ... and ... 32Fx8 versions of the same --- */
2416
2417 static
binary32Fx8_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX,IRAtom * vatomY)2418 IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM,
2419 IRAtom* vatomX, IRAtom* vatomY )
2420 {
2421 IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY);
2422 // PCast the RM, and widen it to 256 bits
2423 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2424 // Roll it into the result
2425 t1 = mkUifUV256(mce, t1, t2);
2426 return t1;
2427 }
2428
2429 /* --- 64Fx2 unary FP ops, with rounding mode --- */
2430
2431 static
unary64Fx2_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX)2432 IRAtom* unary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2433 {
2434 /* Same scheme as binary64Fx2_w_rm. */
2435 // "do" the vector arg
2436 IRAtom* t1 = unary64Fx2(mce, vatomX);
2437 // PCast the RM, and widen it to 128 bits
2438 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2439 // Roll it into the result
2440 t1 = mkUifUV128(mce, t1, t2);
2441 return t1;
2442 }
2443
2444 /* --- ... and ... 32Fx4 versions of the same --- */
2445
2446 static
unary32Fx4_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX)2447 IRAtom* unary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2448 {
2449 /* Same scheme as unary32Fx4_w_rm. */
2450 IRAtom* t1 = unary32Fx4(mce, vatomX);
2451 // PCast the RM, and widen it to 128 bits
2452 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2453 // Roll it into the result
2454 t1 = mkUifUV128(mce, t1, t2);
2455 return t1;
2456 }
2457
2458
2459 /* --- --- Vector saturated narrowing --- --- */
2460
2461 /* We used to do something very clever here, but on closer inspection
2462 (2011-Jun-15), and in particular bug #279698, it turns out to be
2463 wrong. Part of the problem came from the fact that for a long
2464 time, the IR primops to do with saturated narrowing were
2465 underspecified and managed to confuse multiple cases which needed
2466 to be separate: the op names had a signedness qualifier, but in
2467 fact the source and destination signednesses needed to be specified
2468 independently, so the op names really need two independent
2469 signedness specifiers.
2470
2471 As of 2011-Jun-15 (ish) the underspecification was sorted out
2472 properly. The incorrect instrumentation remained, though. That
2473 has now (2011-Oct-22) been fixed.
2474
2475 What we now do is simple:
2476
2477 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2478 number of lanes, X is the source lane width and signedness, and Y
2479 is the destination lane width and signedness. In all cases the
2480 destination lane width is half the source lane width, so the names
2481 have a bit of redundancy, but are at least easy to read.
2482
2483 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2484 to unsigned 16s.
2485
2486 Let Vanilla(OP) be a function that takes OP, one of these
2487 saturating narrowing ops, and produces the same "shaped" narrowing
2488 op which is not saturating, but merely dumps the most significant
2489 bits. "same shape" means that the lane numbers and widths are the
2490 same as with OP.
2491
2492 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2493 = Iop_NarrowBin32to16x8,
2494 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2495 dumping the top half of each lane.
2496
2497 So, with that in place, the scheme is simple, and it is simple to
2498 pessimise each lane individually and then apply Vanilla(OP) so as
2499 to get the result in the right "shape". If the original OP is
2500 QNarrowBinXtoYxZ then we produce
2501
2502 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2503
2504 or for the case when OP is unary (Iop_QNarrowUn*)
2505
2506 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2507 */
2508 static
vanillaNarrowingOpOfShape(IROp qnarrowOp)2509 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2510 {
2511 switch (qnarrowOp) {
2512 /* Binary: (128, 128) -> 128 */
2513 case Iop_QNarrowBin16Sto8Ux16:
2514 case Iop_QNarrowBin16Sto8Sx16:
2515 case Iop_QNarrowBin16Uto8Ux16:
2516 case Iop_QNarrowBin64Sto32Sx4:
2517 case Iop_QNarrowBin64Uto32Ux4:
2518 return Iop_NarrowBin16to8x16;
2519 case Iop_QNarrowBin32Sto16Ux8:
2520 case Iop_QNarrowBin32Sto16Sx8:
2521 case Iop_QNarrowBin32Uto16Ux8:
2522 return Iop_NarrowBin32to16x8;
2523 /* Binary: (64, 64) -> 64 */
2524 case Iop_QNarrowBin32Sto16Sx4:
2525 return Iop_NarrowBin32to16x4;
2526 case Iop_QNarrowBin16Sto8Ux8:
2527 case Iop_QNarrowBin16Sto8Sx8:
2528 return Iop_NarrowBin16to8x8;
2529 /* Unary: 128 -> 64 */
2530 case Iop_QNarrowUn64Uto32Ux2:
2531 case Iop_QNarrowUn64Sto32Sx2:
2532 case Iop_QNarrowUn64Sto32Ux2:
2533 return Iop_NarrowUn64to32x2;
2534 case Iop_QNarrowUn32Uto16Ux4:
2535 case Iop_QNarrowUn32Sto16Sx4:
2536 case Iop_QNarrowUn32Sto16Ux4:
2537 case Iop_F32toF16x4:
2538 return Iop_NarrowUn32to16x4;
2539 case Iop_QNarrowUn16Uto8Ux8:
2540 case Iop_QNarrowUn16Sto8Sx8:
2541 case Iop_QNarrowUn16Sto8Ux8:
2542 return Iop_NarrowUn16to8x8;
2543 default:
2544 ppIROp(qnarrowOp);
2545 VG_(tool_panic)("vanillaNarrowOpOfShape");
2546 }
2547 }
2548
2549 static
vectorNarrowBinV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)2550 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2551 IRAtom* vatom1, IRAtom* vatom2)
2552 {
2553 IRAtom *at1, *at2, *at3;
2554 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2555 switch (narrow_op) {
2556 case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break;
2557 case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break;
2558 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2559 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2560 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2561 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2562 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2563 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2564 default: VG_(tool_panic)("vectorNarrowBinV128");
2565 }
2566 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2567 tl_assert(isShadowAtom(mce,vatom1));
2568 tl_assert(isShadowAtom(mce,vatom2));
2569 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2570 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2571 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
2572 return at3;
2573 }
2574
2575 static
vectorNarrowBin64(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)2576 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2577 IRAtom* vatom1, IRAtom* vatom2)
2578 {
2579 IRAtom *at1, *at2, *at3;
2580 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2581 switch (narrow_op) {
2582 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2583 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2584 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2585 default: VG_(tool_panic)("vectorNarrowBin64");
2586 }
2587 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2588 tl_assert(isShadowAtom(mce,vatom1));
2589 tl_assert(isShadowAtom(mce,vatom2));
2590 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2591 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2592 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
2593 return at3;
2594 }
2595
2596 static
vectorNarrowUnV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1)2597 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
2598 IRAtom* vatom1)
2599 {
2600 IRAtom *at1, *at2;
2601 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2602 tl_assert(isShadowAtom(mce,vatom1));
2603 /* For vanilla narrowing (non-saturating), we can just apply
2604 the op directly to the V bits. */
2605 switch (narrow_op) {
2606 case Iop_NarrowUn16to8x8:
2607 case Iop_NarrowUn32to16x4:
2608 case Iop_NarrowUn64to32x2:
2609 case Iop_F32toF16x4:
2610 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2611 return at1;
2612 default:
2613 break; /* Do Plan B */
2614 }
2615 /* Plan B: for ops that involve a saturation operation on the args,
2616 we must PCast before the vanilla narrow. */
2617 switch (narrow_op) {
2618 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2619 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2620 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2621 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2622 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2623 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2624 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2625 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2626 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2627 default: VG_(tool_panic)("vectorNarrowUnV128");
2628 }
2629 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2630 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2631 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
2632 return at2;
2633 }
2634
2635 static
vectorWidenI64(MCEnv * mce,IROp longen_op,IRAtom * vatom1)2636 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2637 IRAtom* vatom1)
2638 {
2639 IRAtom *at1, *at2;
2640 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2641 switch (longen_op) {
2642 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2643 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2644 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2645 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2646 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2647 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2648 case Iop_F16toF32x4: pcast = mkPCast32x4; break;
2649 default: VG_(tool_panic)("vectorWidenI64");
2650 }
2651 tl_assert(isShadowAtom(mce,vatom1));
2652 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2653 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2654 return at2;
2655 }
2656
2657
2658 /* --- --- Vector integer arithmetic --- --- */
2659
2660 /* Simple ... UifU the args and per-lane pessimise the results. */
2661
2662 /* --- V256-bit versions --- */
2663
2664 static
binary8Ix32(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2665 IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2666 {
2667 IRAtom* at;
2668 at = mkUifUV256(mce, vatom1, vatom2);
2669 at = mkPCast8x32(mce, at);
2670 return at;
2671 }
2672
2673 static
binary16Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2674 IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2675 {
2676 IRAtom* at;
2677 at = mkUifUV256(mce, vatom1, vatom2);
2678 at = mkPCast16x16(mce, at);
2679 return at;
2680 }
2681
2682 static
binary32Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2683 IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2684 {
2685 IRAtom* at;
2686 at = mkUifUV256(mce, vatom1, vatom2);
2687 at = mkPCast32x8(mce, at);
2688 return at;
2689 }
2690
2691 static
binary64Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2692 IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2693 {
2694 IRAtom* at;
2695 at = mkUifUV256(mce, vatom1, vatom2);
2696 at = mkPCast64x4(mce, at);
2697 return at;
2698 }
2699
2700 /* --- V128-bit versions --- */
2701
2702 static
binary8Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2703 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2704 {
2705 IRAtom* at;
2706 at = mkUifUV128(mce, vatom1, vatom2);
2707 at = mkPCast8x16(mce, at);
2708 return at;
2709 }
2710
2711 static
binary16Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2712 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2713 {
2714 IRAtom* at;
2715 at = mkUifUV128(mce, vatom1, vatom2);
2716 at = mkPCast16x8(mce, at);
2717 return at;
2718 }
2719
2720 static
binary32Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2721 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2722 {
2723 IRAtom* at;
2724 at = mkUifUV128(mce, vatom1, vatom2);
2725 at = mkPCast32x4(mce, at);
2726 return at;
2727 }
2728
2729 static
binary64Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2730 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2731 {
2732 IRAtom* at;
2733 at = mkUifUV128(mce, vatom1, vatom2);
2734 at = mkPCast64x2(mce, at);
2735 return at;
2736 }
2737
2738 /* --- 64-bit versions --- */
2739
2740 static
binary8Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2741 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2742 {
2743 IRAtom* at;
2744 at = mkUifU64(mce, vatom1, vatom2);
2745 at = mkPCast8x8(mce, at);
2746 return at;
2747 }
2748
2749 static
binary16Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2750 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2751 {
2752 IRAtom* at;
2753 at = mkUifU64(mce, vatom1, vatom2);
2754 at = mkPCast16x4(mce, at);
2755 return at;
2756 }
2757
2758 static
binary32Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2759 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2760 {
2761 IRAtom* at;
2762 at = mkUifU64(mce, vatom1, vatom2);
2763 at = mkPCast32x2(mce, at);
2764 return at;
2765 }
2766
2767 static
binary64Ix1(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2768 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2769 {
2770 IRAtom* at;
2771 at = mkUifU64(mce, vatom1, vatom2);
2772 at = mkPCastTo(mce, Ity_I64, at);
2773 return at;
2774 }
2775
2776 /* --- 32-bit versions --- */
2777
2778 static
binary8Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2779 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2780 {
2781 IRAtom* at;
2782 at = mkUifU32(mce, vatom1, vatom2);
2783 at = mkPCast8x4(mce, at);
2784 return at;
2785 }
2786
2787 static
binary16Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2788 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2789 {
2790 IRAtom* at;
2791 at = mkUifU32(mce, vatom1, vatom2);
2792 at = mkPCast16x2(mce, at);
2793 return at;
2794 }
2795
2796
2797 /*------------------------------------------------------------*/
2798 /*--- Generate shadow values from all kinds of IRExprs. ---*/
2799 /*------------------------------------------------------------*/
2800
2801 static
expr2vbits_Qop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2,IRAtom * atom3,IRAtom * atom4)2802 IRAtom* expr2vbits_Qop ( MCEnv* mce,
2803 IROp op,
2804 IRAtom* atom1, IRAtom* atom2,
2805 IRAtom* atom3, IRAtom* atom4 )
2806 {
2807 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2808 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2809 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2810 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2811
2812 tl_assert(isOriginalAtom(mce,atom1));
2813 tl_assert(isOriginalAtom(mce,atom2));
2814 tl_assert(isOriginalAtom(mce,atom3));
2815 tl_assert(isOriginalAtom(mce,atom4));
2816 tl_assert(isShadowAtom(mce,vatom1));
2817 tl_assert(isShadowAtom(mce,vatom2));
2818 tl_assert(isShadowAtom(mce,vatom3));
2819 tl_assert(isShadowAtom(mce,vatom4));
2820 tl_assert(sameKindedAtoms(atom1,vatom1));
2821 tl_assert(sameKindedAtoms(atom2,vatom2));
2822 tl_assert(sameKindedAtoms(atom3,vatom3));
2823 tl_assert(sameKindedAtoms(atom4,vatom4));
2824 switch (op) {
2825 case Iop_MAddF64:
2826 case Iop_MAddF64r32:
2827 case Iop_MSubF64:
2828 case Iop_MSubF64r32:
2829 /* I32(rm) x F64 x F64 x F64 -> F64 */
2830 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2831
2832 case Iop_MAddF32:
2833 case Iop_MSubF32:
2834 /* I32(rm) x F32 x F32 x F32 -> F32 */
2835 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2836
2837 case Iop_MAddF128:
2838 case Iop_MSubF128:
2839 case Iop_NegMAddF128:
2840 case Iop_NegMSubF128:
2841 /* I32(rm) x F128 x F128 x F128 -> F128 */
2842 return mkLazy4(mce, Ity_I128, vatom1, vatom2, vatom3, vatom4);
2843
2844 /* V256-bit data-steering */
2845 case Iop_64x4toV256:
2846 return assignNew('V', mce, Ity_V256,
2847 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2848
2849 default:
2850 ppIROp(op);
2851 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2852 }
2853 }
2854
2855
2856 static
expr2vbits_Triop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2,IRAtom * atom3)2857 IRAtom* expr2vbits_Triop ( MCEnv* mce,
2858 IROp op,
2859 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2860 {
2861 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2862 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2863 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2864
2865 tl_assert(isOriginalAtom(mce,atom1));
2866 tl_assert(isOriginalAtom(mce,atom2));
2867 tl_assert(isOriginalAtom(mce,atom3));
2868 tl_assert(isShadowAtom(mce,vatom1));
2869 tl_assert(isShadowAtom(mce,vatom2));
2870 tl_assert(isShadowAtom(mce,vatom3));
2871 tl_assert(sameKindedAtoms(atom1,vatom1));
2872 tl_assert(sameKindedAtoms(atom2,vatom2));
2873 tl_assert(sameKindedAtoms(atom3,vatom3));
2874 switch (op) {
2875 case Iop_AddF128:
2876 case Iop_SubF128:
2877 case Iop_MulF128:
2878 case Iop_DivF128:
2879 case Iop_AddD128:
2880 case Iop_SubD128:
2881 case Iop_MulD128:
2882 case Iop_DivD128:
2883 case Iop_QuantizeD128:
2884 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
2885 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2886 case Iop_AddF64:
2887 case Iop_AddD64:
2888 case Iop_AddF64r32:
2889 case Iop_SubF64:
2890 case Iop_SubD64:
2891 case Iop_SubF64r32:
2892 case Iop_MulF64:
2893 case Iop_MulD64:
2894 case Iop_MulF64r32:
2895 case Iop_DivF64:
2896 case Iop_DivD64:
2897 case Iop_DivF64r32:
2898 case Iop_ScaleF64:
2899 case Iop_Yl2xF64:
2900 case Iop_Yl2xp1F64:
2901 case Iop_AtanF64:
2902 case Iop_PRemF64:
2903 case Iop_PRem1F64:
2904 case Iop_QuantizeD64:
2905 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
2906 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2907 case Iop_PRemC3210F64:
2908 case Iop_PRem1C3210F64:
2909 /* I32(rm) x F64 x F64 -> I32 */
2910 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2911 case Iop_AddF32:
2912 case Iop_SubF32:
2913 case Iop_MulF32:
2914 case Iop_DivF32:
2915 /* I32(rm) x F32 x F32 -> I32 */
2916 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2917 case Iop_SignificanceRoundD64:
2918 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
2919 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2920 case Iop_SignificanceRoundD128:
2921 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
2922 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2923 case Iop_SliceV128:
2924 /* (V128, V128, I8) -> V128 */
2925 complainIfUndefined(mce, atom3, NULL);
2926 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2927 case Iop_Slice64:
2928 /* (I64, I64, I8) -> I64 */
2929 complainIfUndefined(mce, atom3, NULL);
2930 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2931 case Iop_SetElem8x8:
2932 case Iop_SetElem16x4:
2933 case Iop_SetElem32x2:
2934 complainIfUndefined(mce, atom2, NULL);
2935 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
2936
2937 /* Vector FP with rounding mode as the first arg */
2938 case Iop_Add64Fx2:
2939 case Iop_Sub64Fx2:
2940 case Iop_Mul64Fx2:
2941 case Iop_Div64Fx2:
2942 return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3);
2943
2944 case Iop_Add32Fx4:
2945 case Iop_Sub32Fx4:
2946 case Iop_Mul32Fx4:
2947 case Iop_Div32Fx4:
2948 return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3);
2949
2950 case Iop_Add64Fx4:
2951 case Iop_Sub64Fx4:
2952 case Iop_Mul64Fx4:
2953 case Iop_Div64Fx4:
2954 return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3);
2955
2956 case Iop_Add32Fx8:
2957 case Iop_Sub32Fx8:
2958 case Iop_Mul32Fx8:
2959 case Iop_Div32Fx8:
2960 return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3);
2961
2962 default:
2963 ppIROp(op);
2964 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2965 }
2966 }
2967
2968
2969 static
expr2vbits_Binop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2)2970 IRAtom* expr2vbits_Binop ( MCEnv* mce,
2971 IROp op,
2972 IRAtom* atom1, IRAtom* atom2 )
2973 {
2974 IRType and_or_ty;
2975 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2976 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2977 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2978
2979 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2980 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2981
2982 tl_assert(isOriginalAtom(mce,atom1));
2983 tl_assert(isOriginalAtom(mce,atom2));
2984 tl_assert(isShadowAtom(mce,vatom1));
2985 tl_assert(isShadowAtom(mce,vatom2));
2986 tl_assert(sameKindedAtoms(atom1,vatom1));
2987 tl_assert(sameKindedAtoms(atom2,vatom2));
2988 switch (op) {
2989
2990 /* 32-bit SIMD */
2991
2992 case Iop_Add16x2:
2993 case Iop_HAdd16Ux2:
2994 case Iop_HAdd16Sx2:
2995 case Iop_Sub16x2:
2996 case Iop_HSub16Ux2:
2997 case Iop_HSub16Sx2:
2998 case Iop_QAdd16Sx2:
2999 case Iop_QSub16Sx2:
3000 case Iop_QSub16Ux2:
3001 case Iop_QAdd16Ux2:
3002 return binary16Ix2(mce, vatom1, vatom2);
3003
3004 case Iop_Add8x4:
3005 case Iop_HAdd8Ux4:
3006 case Iop_HAdd8Sx4:
3007 case Iop_Sub8x4:
3008 case Iop_HSub8Ux4:
3009 case Iop_HSub8Sx4:
3010 case Iop_QSub8Ux4:
3011 case Iop_QAdd8Ux4:
3012 case Iop_QSub8Sx4:
3013 case Iop_QAdd8Sx4:
3014 return binary8Ix4(mce, vatom1, vatom2);
3015
3016 /* 64-bit SIMD */
3017
3018 case Iop_ShrN8x8:
3019 case Iop_ShrN16x4:
3020 case Iop_ShrN32x2:
3021 case Iop_SarN8x8:
3022 case Iop_SarN16x4:
3023 case Iop_SarN32x2:
3024 case Iop_ShlN16x4:
3025 case Iop_ShlN32x2:
3026 case Iop_ShlN8x8:
3027 /* Same scheme as with all other shifts. */
3028 complainIfUndefined(mce, atom2, NULL);
3029 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3030
3031 case Iop_QNarrowBin32Sto16Sx4:
3032 case Iop_QNarrowBin16Sto8Sx8:
3033 case Iop_QNarrowBin16Sto8Ux8:
3034 return vectorNarrowBin64(mce, op, vatom1, vatom2);
3035
3036 case Iop_Min8Ux8:
3037 case Iop_Min8Sx8:
3038 case Iop_Max8Ux8:
3039 case Iop_Max8Sx8:
3040 case Iop_Avg8Ux8:
3041 case Iop_QSub8Sx8:
3042 case Iop_QSub8Ux8:
3043 case Iop_Sub8x8:
3044 case Iop_CmpGT8Sx8:
3045 case Iop_CmpGT8Ux8:
3046 case Iop_CmpEQ8x8:
3047 case Iop_QAdd8Sx8:
3048 case Iop_QAdd8Ux8:
3049 case Iop_QSal8x8:
3050 case Iop_QShl8x8:
3051 case Iop_Add8x8:
3052 case Iop_Mul8x8:
3053 case Iop_PolynomialMul8x8:
3054 return binary8Ix8(mce, vatom1, vatom2);
3055
3056 case Iop_Min16Sx4:
3057 case Iop_Min16Ux4:
3058 case Iop_Max16Sx4:
3059 case Iop_Max16Ux4:
3060 case Iop_Avg16Ux4:
3061 case Iop_QSub16Ux4:
3062 case Iop_QSub16Sx4:
3063 case Iop_Sub16x4:
3064 case Iop_Mul16x4:
3065 case Iop_MulHi16Sx4:
3066 case Iop_MulHi16Ux4:
3067 case Iop_CmpGT16Sx4:
3068 case Iop_CmpGT16Ux4:
3069 case Iop_CmpEQ16x4:
3070 case Iop_QAdd16Sx4:
3071 case Iop_QAdd16Ux4:
3072 case Iop_QSal16x4:
3073 case Iop_QShl16x4:
3074 case Iop_Add16x4:
3075 case Iop_QDMulHi16Sx4:
3076 case Iop_QRDMulHi16Sx4:
3077 return binary16Ix4(mce, vatom1, vatom2);
3078
3079 case Iop_Sub32x2:
3080 case Iop_Mul32x2:
3081 case Iop_Max32Sx2:
3082 case Iop_Max32Ux2:
3083 case Iop_Min32Sx2:
3084 case Iop_Min32Ux2:
3085 case Iop_CmpGT32Sx2:
3086 case Iop_CmpGT32Ux2:
3087 case Iop_CmpEQ32x2:
3088 case Iop_Add32x2:
3089 case Iop_QAdd32Ux2:
3090 case Iop_QAdd32Sx2:
3091 case Iop_QSub32Ux2:
3092 case Iop_QSub32Sx2:
3093 case Iop_QSal32x2:
3094 case Iop_QShl32x2:
3095 case Iop_QDMulHi32Sx2:
3096 case Iop_QRDMulHi32Sx2:
3097 return binary32Ix2(mce, vatom1, vatom2);
3098
3099 case Iop_QSub64Ux1:
3100 case Iop_QSub64Sx1:
3101 case Iop_QAdd64Ux1:
3102 case Iop_QAdd64Sx1:
3103 case Iop_QSal64x1:
3104 case Iop_QShl64x1:
3105 case Iop_Sal64x1:
3106 return binary64Ix1(mce, vatom1, vatom2);
3107
3108 case Iop_QShlNsatSU8x8:
3109 case Iop_QShlNsatUU8x8:
3110 case Iop_QShlNsatSS8x8:
3111 complainIfUndefined(mce, atom2, NULL);
3112 return mkPCast8x8(mce, vatom1);
3113
3114 case Iop_QShlNsatSU16x4:
3115 case Iop_QShlNsatUU16x4:
3116 case Iop_QShlNsatSS16x4:
3117 complainIfUndefined(mce, atom2, NULL);
3118 return mkPCast16x4(mce, vatom1);
3119
3120 case Iop_QShlNsatSU32x2:
3121 case Iop_QShlNsatUU32x2:
3122 case Iop_QShlNsatSS32x2:
3123 complainIfUndefined(mce, atom2, NULL);
3124 return mkPCast32x2(mce, vatom1);
3125
3126 case Iop_QShlNsatSU64x1:
3127 case Iop_QShlNsatUU64x1:
3128 case Iop_QShlNsatSS64x1:
3129 complainIfUndefined(mce, atom2, NULL);
3130 return mkPCast32x2(mce, vatom1);
3131
3132 case Iop_PwMax32Sx2:
3133 case Iop_PwMax32Ux2:
3134 case Iop_PwMin32Sx2:
3135 case Iop_PwMin32Ux2:
3136 case Iop_PwMax32Fx2:
3137 case Iop_PwMin32Fx2:
3138 return assignNew('V', mce, Ity_I64,
3139 binop(Iop_PwMax32Ux2,
3140 mkPCast32x2(mce, vatom1),
3141 mkPCast32x2(mce, vatom2)));
3142
3143 case Iop_PwMax16Sx4:
3144 case Iop_PwMax16Ux4:
3145 case Iop_PwMin16Sx4:
3146 case Iop_PwMin16Ux4:
3147 return assignNew('V', mce, Ity_I64,
3148 binop(Iop_PwMax16Ux4,
3149 mkPCast16x4(mce, vatom1),
3150 mkPCast16x4(mce, vatom2)));
3151
3152 case Iop_PwMax8Sx8:
3153 case Iop_PwMax8Ux8:
3154 case Iop_PwMin8Sx8:
3155 case Iop_PwMin8Ux8:
3156 return assignNew('V', mce, Ity_I64,
3157 binop(Iop_PwMax8Ux8,
3158 mkPCast8x8(mce, vatom1),
3159 mkPCast8x8(mce, vatom2)));
3160
3161 case Iop_PwAdd32x2:
3162 case Iop_PwAdd32Fx2:
3163 return mkPCast32x2(mce,
3164 assignNew('V', mce, Ity_I64,
3165 binop(Iop_PwAdd32x2,
3166 mkPCast32x2(mce, vatom1),
3167 mkPCast32x2(mce, vatom2))));
3168
3169 case Iop_PwAdd16x4:
3170 return mkPCast16x4(mce,
3171 assignNew('V', mce, Ity_I64,
3172 binop(op, mkPCast16x4(mce, vatom1),
3173 mkPCast16x4(mce, vatom2))));
3174
3175 case Iop_PwAdd8x8:
3176 return mkPCast8x8(mce,
3177 assignNew('V', mce, Ity_I64,
3178 binop(op, mkPCast8x8(mce, vatom1),
3179 mkPCast8x8(mce, vatom2))));
3180
3181 case Iop_Shl8x8:
3182 case Iop_Shr8x8:
3183 case Iop_Sar8x8:
3184 case Iop_Sal8x8:
3185 return mkUifU64(mce,
3186 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3187 mkPCast8x8(mce,vatom2)
3188 );
3189
3190 case Iop_Shl16x4:
3191 case Iop_Shr16x4:
3192 case Iop_Sar16x4:
3193 case Iop_Sal16x4:
3194 return mkUifU64(mce,
3195 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3196 mkPCast16x4(mce,vatom2)
3197 );
3198
3199 case Iop_Shl32x2:
3200 case Iop_Shr32x2:
3201 case Iop_Sar32x2:
3202 case Iop_Sal32x2:
3203 return mkUifU64(mce,
3204 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3205 mkPCast32x2(mce,vatom2)
3206 );
3207
3208 /* 64-bit data-steering */
3209 case Iop_InterleaveLO32x2:
3210 case Iop_InterleaveLO16x4:
3211 case Iop_InterleaveLO8x8:
3212 case Iop_InterleaveHI32x2:
3213 case Iop_InterleaveHI16x4:
3214 case Iop_InterleaveHI8x8:
3215 case Iop_CatOddLanes8x8:
3216 case Iop_CatEvenLanes8x8:
3217 case Iop_CatOddLanes16x4:
3218 case Iop_CatEvenLanes16x4:
3219 case Iop_InterleaveOddLanes8x8:
3220 case Iop_InterleaveEvenLanes8x8:
3221 case Iop_InterleaveOddLanes16x4:
3222 case Iop_InterleaveEvenLanes16x4:
3223 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3224
3225 case Iop_GetElem8x8:
3226 complainIfUndefined(mce, atom2, NULL);
3227 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3228 case Iop_GetElem16x4:
3229 complainIfUndefined(mce, atom2, NULL);
3230 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3231 case Iop_GetElem32x2:
3232 complainIfUndefined(mce, atom2, NULL);
3233 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3234
3235 /* Perm8x8: rearrange values in left arg using steering values
3236 from right arg. So rearrange the vbits in the same way but
3237 pessimise wrt steering values. */
3238 case Iop_Perm8x8:
3239 return mkUifU64(
3240 mce,
3241 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3242 mkPCast8x8(mce, vatom2)
3243 );
3244
3245 /* V128-bit SIMD */
3246
3247 case Iop_Sqrt32Fx4:
3248 return unary32Fx4_w_rm(mce, vatom1, vatom2);
3249 case Iop_Sqrt64Fx2:
3250 return unary64Fx2_w_rm(mce, vatom1, vatom2);
3251
3252 case Iop_ShrN8x16:
3253 case Iop_ShrN16x8:
3254 case Iop_ShrN32x4:
3255 case Iop_ShrN64x2:
3256 case Iop_SarN8x16:
3257 case Iop_SarN16x8:
3258 case Iop_SarN32x4:
3259 case Iop_SarN64x2:
3260 case Iop_ShlN8x16:
3261 case Iop_ShlN16x8:
3262 case Iop_ShlN32x4:
3263 case Iop_ShlN64x2:
3264 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3265 this is wrong now, scalar shifts are done properly lazily.
3266 Vector shifts should be fixed too. */
3267 complainIfUndefined(mce, atom2, NULL);
3268 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3269
3270 /* V x V shifts/rotates are done using the standard lazy scheme. */
3271 /* For the non-rounding variants of bi-di vector x vector
3272 shifts (the Iop_Sh.. ops, that is) we use the lazy scheme.
3273 But note that this is overly pessimistic, because in fact only
3274 the bottom 8 bits of each lane of the second argument are taken
3275 into account when shifting. So really we ought to ignore
3276 undefinedness in bits 8 and above of each lane in the
3277 second argument. */
3278 case Iop_Shl8x16:
3279 case Iop_Shr8x16:
3280 case Iop_Sar8x16:
3281 case Iop_Sal8x16:
3282 case Iop_Rol8x16:
3283 case Iop_Sh8Sx16:
3284 case Iop_Sh8Ux16:
3285 return mkUifUV128(mce,
3286 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3287 mkPCast8x16(mce,vatom2)
3288 );
3289
3290 case Iop_Shl16x8:
3291 case Iop_Shr16x8:
3292 case Iop_Sar16x8:
3293 case Iop_Sal16x8:
3294 case Iop_Rol16x8:
3295 case Iop_Sh16Sx8:
3296 case Iop_Sh16Ux8:
3297 return mkUifUV128(mce,
3298 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3299 mkPCast16x8(mce,vatom2)
3300 );
3301
3302 case Iop_Shl32x4:
3303 case Iop_Shr32x4:
3304 case Iop_Sar32x4:
3305 case Iop_Sal32x4:
3306 case Iop_Rol32x4:
3307 case Iop_Sh32Sx4:
3308 case Iop_Sh32Ux4:
3309 return mkUifUV128(mce,
3310 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3311 mkPCast32x4(mce,vatom2)
3312 );
3313
3314 case Iop_Shl64x2:
3315 case Iop_Shr64x2:
3316 case Iop_Sar64x2:
3317 case Iop_Sal64x2:
3318 case Iop_Rol64x2:
3319 case Iop_Sh64Sx2:
3320 case Iop_Sh64Ux2:
3321 return mkUifUV128(mce,
3322 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3323 mkPCast64x2(mce,vatom2)
3324 );
3325
3326 /* For the rounding variants of bi-di vector x vector shifts, the
3327 rounding adjustment can cause undefinedness to propagate through
3328 the entire lane, in the worst case. Too complex to handle
3329 properly .. just UifU the arguments and then PCast them.
3330 Suboptimal but safe. */
3331 case Iop_Rsh8Sx16:
3332 case Iop_Rsh8Ux16:
3333 return binary8Ix16(mce, vatom1, vatom2);
3334 case Iop_Rsh16Sx8:
3335 case Iop_Rsh16Ux8:
3336 return binary16Ix8(mce, vatom1, vatom2);
3337 case Iop_Rsh32Sx4:
3338 case Iop_Rsh32Ux4:
3339 return binary32Ix4(mce, vatom1, vatom2);
3340 case Iop_Rsh64Sx2:
3341 case Iop_Rsh64Ux2:
3342 return binary64Ix2(mce, vatom1, vatom2);
3343
3344 case Iop_F32ToFixed32Ux4_RZ:
3345 case Iop_F32ToFixed32Sx4_RZ:
3346 case Iop_Fixed32UToF32x4_RN:
3347 case Iop_Fixed32SToF32x4_RN:
3348 complainIfUndefined(mce, atom2, NULL);
3349 return mkPCast32x4(mce, vatom1);
3350
3351 case Iop_F32ToFixed32Ux2_RZ:
3352 case Iop_F32ToFixed32Sx2_RZ:
3353 case Iop_Fixed32UToF32x2_RN:
3354 case Iop_Fixed32SToF32x2_RN:
3355 complainIfUndefined(mce, atom2, NULL);
3356 return mkPCast32x2(mce, vatom1);
3357
3358 case Iop_QSub8Ux16:
3359 case Iop_QSub8Sx16:
3360 case Iop_Sub8x16:
3361 case Iop_Min8Ux16:
3362 case Iop_Min8Sx16:
3363 case Iop_Max8Ux16:
3364 case Iop_Max8Sx16:
3365 case Iop_CmpGT8Sx16:
3366 case Iop_CmpGT8Ux16:
3367 case Iop_CmpEQ8x16:
3368 case Iop_Avg8Ux16:
3369 case Iop_Avg8Sx16:
3370 case Iop_QAdd8Ux16:
3371 case Iop_QAdd8Sx16:
3372 case Iop_QAddExtUSsatSS8x16:
3373 case Iop_QAddExtSUsatUU8x16:
3374 case Iop_QSal8x16:
3375 case Iop_QShl8x16:
3376 case Iop_Add8x16:
3377 case Iop_Mul8x16:
3378 case Iop_PolynomialMul8x16:
3379 case Iop_PolynomialMulAdd8x16:
3380 return binary8Ix16(mce, vatom1, vatom2);
3381
3382 case Iop_QSub16Ux8:
3383 case Iop_QSub16Sx8:
3384 case Iop_Sub16x8:
3385 case Iop_Mul16x8:
3386 case Iop_MulHi16Sx8:
3387 case Iop_MulHi16Ux8:
3388 case Iop_Min16Sx8:
3389 case Iop_Min16Ux8:
3390 case Iop_Max16Sx8:
3391 case Iop_Max16Ux8:
3392 case Iop_CmpGT16Sx8:
3393 case Iop_CmpGT16Ux8:
3394 case Iop_CmpEQ16x8:
3395 case Iop_Avg16Ux8:
3396 case Iop_Avg16Sx8:
3397 case Iop_QAdd16Ux8:
3398 case Iop_QAdd16Sx8:
3399 case Iop_QAddExtUSsatSS16x8:
3400 case Iop_QAddExtSUsatUU16x8:
3401 case Iop_QSal16x8:
3402 case Iop_QShl16x8:
3403 case Iop_Add16x8:
3404 case Iop_QDMulHi16Sx8:
3405 case Iop_QRDMulHi16Sx8:
3406 case Iop_PolynomialMulAdd16x8:
3407 return binary16Ix8(mce, vatom1, vatom2);
3408
3409 case Iop_Sub32x4:
3410 case Iop_CmpGT32Sx4:
3411 case Iop_CmpGT32Ux4:
3412 case Iop_CmpEQ32x4:
3413 case Iop_QAdd32Sx4:
3414 case Iop_QAdd32Ux4:
3415 case Iop_QSub32Sx4:
3416 case Iop_QSub32Ux4:
3417 case Iop_QAddExtUSsatSS32x4:
3418 case Iop_QAddExtSUsatUU32x4:
3419 case Iop_QSal32x4:
3420 case Iop_QShl32x4:
3421 case Iop_Avg32Ux4:
3422 case Iop_Avg32Sx4:
3423 case Iop_Add32x4:
3424 case Iop_Max32Ux4:
3425 case Iop_Max32Sx4:
3426 case Iop_Min32Ux4:
3427 case Iop_Min32Sx4:
3428 case Iop_Mul32x4:
3429 case Iop_QDMulHi32Sx4:
3430 case Iop_QRDMulHi32Sx4:
3431 case Iop_PolynomialMulAdd32x4:
3432 return binary32Ix4(mce, vatom1, vatom2);
3433
3434 case Iop_Sub64x2:
3435 case Iop_Add64x2:
3436 case Iop_Max64Sx2:
3437 case Iop_Max64Ux2:
3438 case Iop_Min64Sx2:
3439 case Iop_Min64Ux2:
3440 case Iop_CmpEQ64x2:
3441 case Iop_CmpGT64Sx2:
3442 case Iop_CmpGT64Ux2:
3443 case Iop_QSal64x2:
3444 case Iop_QShl64x2:
3445 case Iop_QAdd64Ux2:
3446 case Iop_QAdd64Sx2:
3447 case Iop_QSub64Ux2:
3448 case Iop_QSub64Sx2:
3449 case Iop_QAddExtUSsatSS64x2:
3450 case Iop_QAddExtSUsatUU64x2:
3451 case Iop_PolynomialMulAdd64x2:
3452 case Iop_CipherV128:
3453 case Iop_CipherLV128:
3454 case Iop_NCipherV128:
3455 case Iop_NCipherLV128:
3456 case Iop_MulI128by10E:
3457 case Iop_MulI128by10ECarry:
3458 return binary64Ix2(mce, vatom1, vatom2);
3459
3460 case Iop_QNarrowBin64Sto32Sx4:
3461 case Iop_QNarrowBin64Uto32Ux4:
3462 case Iop_QNarrowBin32Sto16Sx8:
3463 case Iop_QNarrowBin32Uto16Ux8:
3464 case Iop_QNarrowBin32Sto16Ux8:
3465 case Iop_QNarrowBin16Sto8Sx16:
3466 case Iop_QNarrowBin16Uto8Ux16:
3467 case Iop_QNarrowBin16Sto8Ux16:
3468 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
3469
3470 case Iop_Min64Fx2:
3471 case Iop_Max64Fx2:
3472 case Iop_CmpLT64Fx2:
3473 case Iop_CmpLE64Fx2:
3474 case Iop_CmpEQ64Fx2:
3475 case Iop_CmpUN64Fx2:
3476 case Iop_RecipStep64Fx2:
3477 case Iop_RSqrtStep64Fx2:
3478 return binary64Fx2(mce, vatom1, vatom2);
3479
3480 case Iop_Sub64F0x2:
3481 case Iop_Mul64F0x2:
3482 case Iop_Min64F0x2:
3483 case Iop_Max64F0x2:
3484 case Iop_Div64F0x2:
3485 case Iop_CmpLT64F0x2:
3486 case Iop_CmpLE64F0x2:
3487 case Iop_CmpEQ64F0x2:
3488 case Iop_CmpUN64F0x2:
3489 case Iop_Add64F0x2:
3490 return binary64F0x2(mce, vatom1, vatom2);
3491
3492 case Iop_Min32Fx4:
3493 case Iop_Max32Fx4:
3494 case Iop_CmpLT32Fx4:
3495 case Iop_CmpLE32Fx4:
3496 case Iop_CmpEQ32Fx4:
3497 case Iop_CmpUN32Fx4:
3498 case Iop_CmpGT32Fx4:
3499 case Iop_CmpGE32Fx4:
3500 case Iop_RecipStep32Fx4:
3501 case Iop_RSqrtStep32Fx4:
3502 return binary32Fx4(mce, vatom1, vatom2);
3503
3504 case Iop_Sub32Fx2:
3505 case Iop_Mul32Fx2:
3506 case Iop_Min32Fx2:
3507 case Iop_Max32Fx2:
3508 case Iop_CmpEQ32Fx2:
3509 case Iop_CmpGT32Fx2:
3510 case Iop_CmpGE32Fx2:
3511 case Iop_Add32Fx2:
3512 case Iop_RecipStep32Fx2:
3513 case Iop_RSqrtStep32Fx2:
3514 return binary32Fx2(mce, vatom1, vatom2);
3515
3516 case Iop_Sub32F0x4:
3517 case Iop_Mul32F0x4:
3518 case Iop_Min32F0x4:
3519 case Iop_Max32F0x4:
3520 case Iop_Div32F0x4:
3521 case Iop_CmpLT32F0x4:
3522 case Iop_CmpLE32F0x4:
3523 case Iop_CmpEQ32F0x4:
3524 case Iop_CmpUN32F0x4:
3525 case Iop_Add32F0x4:
3526 return binary32F0x4(mce, vatom1, vatom2);
3527
3528 case Iop_QShlNsatSU8x16:
3529 case Iop_QShlNsatUU8x16:
3530 case Iop_QShlNsatSS8x16:
3531 complainIfUndefined(mce, atom2, NULL);
3532 return mkPCast8x16(mce, vatom1);
3533
3534 case Iop_QShlNsatSU16x8:
3535 case Iop_QShlNsatUU16x8:
3536 case Iop_QShlNsatSS16x8:
3537 complainIfUndefined(mce, atom2, NULL);
3538 return mkPCast16x8(mce, vatom1);
3539
3540 case Iop_QShlNsatSU32x4:
3541 case Iop_QShlNsatUU32x4:
3542 case Iop_QShlNsatSS32x4:
3543 complainIfUndefined(mce, atom2, NULL);
3544 return mkPCast32x4(mce, vatom1);
3545
3546 case Iop_QShlNsatSU64x2:
3547 case Iop_QShlNsatUU64x2:
3548 case Iop_QShlNsatSS64x2:
3549 complainIfUndefined(mce, atom2, NULL);
3550 return mkPCast32x4(mce, vatom1);
3551
3552 /* Q-and-Qshift-by-imm-and-narrow of the form (V128, I8) -> V128.
3553 To make this simpler, do the following:
3554 * complain if the shift amount (the I8) is undefined
3555 * pcast each lane at the wide width
3556 * truncate each lane to half width
3557 * pcast the resulting 64-bit value to a single bit and use
3558 that as the least significant bit of the upper half of the
3559 result. */
3560 case Iop_QandQShrNnarrow64Uto32Ux2:
3561 case Iop_QandQSarNnarrow64Sto32Sx2:
3562 case Iop_QandQSarNnarrow64Sto32Ux2:
3563 case Iop_QandQRShrNnarrow64Uto32Ux2:
3564 case Iop_QandQRSarNnarrow64Sto32Sx2:
3565 case Iop_QandQRSarNnarrow64Sto32Ux2:
3566 case Iop_QandQShrNnarrow32Uto16Ux4:
3567 case Iop_QandQSarNnarrow32Sto16Sx4:
3568 case Iop_QandQSarNnarrow32Sto16Ux4:
3569 case Iop_QandQRShrNnarrow32Uto16Ux4:
3570 case Iop_QandQRSarNnarrow32Sto16Sx4:
3571 case Iop_QandQRSarNnarrow32Sto16Ux4:
3572 case Iop_QandQShrNnarrow16Uto8Ux8:
3573 case Iop_QandQSarNnarrow16Sto8Sx8:
3574 case Iop_QandQSarNnarrow16Sto8Ux8:
3575 case Iop_QandQRShrNnarrow16Uto8Ux8:
3576 case Iop_QandQRSarNnarrow16Sto8Sx8:
3577 case Iop_QandQRSarNnarrow16Sto8Ux8:
3578 {
3579 IRAtom* (*fnPessim) (MCEnv*, IRAtom*) = NULL;
3580 IROp opNarrow = Iop_INVALID;
3581 switch (op) {
3582 case Iop_QandQShrNnarrow64Uto32Ux2:
3583 case Iop_QandQSarNnarrow64Sto32Sx2:
3584 case Iop_QandQSarNnarrow64Sto32Ux2:
3585 case Iop_QandQRShrNnarrow64Uto32Ux2:
3586 case Iop_QandQRSarNnarrow64Sto32Sx2:
3587 case Iop_QandQRSarNnarrow64Sto32Ux2:
3588 fnPessim = mkPCast64x2;
3589 opNarrow = Iop_NarrowUn64to32x2;
3590 break;
3591 case Iop_QandQShrNnarrow32Uto16Ux4:
3592 case Iop_QandQSarNnarrow32Sto16Sx4:
3593 case Iop_QandQSarNnarrow32Sto16Ux4:
3594 case Iop_QandQRShrNnarrow32Uto16Ux4:
3595 case Iop_QandQRSarNnarrow32Sto16Sx4:
3596 case Iop_QandQRSarNnarrow32Sto16Ux4:
3597 fnPessim = mkPCast32x4;
3598 opNarrow = Iop_NarrowUn32to16x4;
3599 break;
3600 case Iop_QandQShrNnarrow16Uto8Ux8:
3601 case Iop_QandQSarNnarrow16Sto8Sx8:
3602 case Iop_QandQSarNnarrow16Sto8Ux8:
3603 case Iop_QandQRShrNnarrow16Uto8Ux8:
3604 case Iop_QandQRSarNnarrow16Sto8Sx8:
3605 case Iop_QandQRSarNnarrow16Sto8Ux8:
3606 fnPessim = mkPCast16x8;
3607 opNarrow = Iop_NarrowUn16to8x8;
3608 break;
3609 default:
3610 tl_assert(0);
3611 }
3612 complainIfUndefined(mce, atom2, NULL);
3613 // Pessimised shift result
3614 IRAtom* shV
3615 = fnPessim(mce, vatom1);
3616 // Narrowed, pessimised shift result
3617 IRAtom* shVnarrowed
3618 = assignNew('V', mce, Ity_I64, unop(opNarrow, shV));
3619 // Generates: Def--(63)--Def PCast-to-I1(narrowed)
3620 IRAtom* qV = mkPCastXXtoXXlsb(mce, shVnarrowed, Ity_I64);
3621 // and assemble the result
3622 return assignNew('V', mce, Ity_V128,
3623 binop(Iop_64HLtoV128, qV, shVnarrowed));
3624 }
3625
3626 case Iop_Mull32Sx2:
3627 case Iop_Mull32Ux2:
3628 case Iop_QDMull32Sx2:
3629 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3630 mkUifU64(mce, vatom1, vatom2));
3631
3632 case Iop_Mull16Sx4:
3633 case Iop_Mull16Ux4:
3634 case Iop_QDMull16Sx4:
3635 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3636 mkUifU64(mce, vatom1, vatom2));
3637
3638 case Iop_Mull8Sx8:
3639 case Iop_Mull8Ux8:
3640 case Iop_PolynomialMull8x8:
3641 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3642 mkUifU64(mce, vatom1, vatom2));
3643
3644 case Iop_PwAdd32x4:
3645 return mkPCast32x4(mce,
3646 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3647 mkPCast32x4(mce, vatom2))));
3648
3649 case Iop_PwAdd16x8:
3650 return mkPCast16x8(mce,
3651 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3652 mkPCast16x8(mce, vatom2))));
3653
3654 case Iop_PwAdd8x16:
3655 return mkPCast8x16(mce,
3656 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3657 mkPCast8x16(mce, vatom2))));
3658
3659 /* V128-bit data-steering */
3660 case Iop_SetV128lo32:
3661 case Iop_SetV128lo64:
3662 case Iop_64HLtoV128:
3663 case Iop_InterleaveLO64x2:
3664 case Iop_InterleaveLO32x4:
3665 case Iop_InterleaveLO16x8:
3666 case Iop_InterleaveLO8x16:
3667 case Iop_InterleaveHI64x2:
3668 case Iop_InterleaveHI32x4:
3669 case Iop_InterleaveHI16x8:
3670 case Iop_InterleaveHI8x16:
3671 case Iop_CatOddLanes8x16:
3672 case Iop_CatOddLanes16x8:
3673 case Iop_CatOddLanes32x4:
3674 case Iop_CatEvenLanes8x16:
3675 case Iop_CatEvenLanes16x8:
3676 case Iop_CatEvenLanes32x4:
3677 case Iop_InterleaveOddLanes8x16:
3678 case Iop_InterleaveOddLanes16x8:
3679 case Iop_InterleaveOddLanes32x4:
3680 case Iop_InterleaveEvenLanes8x16:
3681 case Iop_InterleaveEvenLanes16x8:
3682 case Iop_InterleaveEvenLanes32x4:
3683 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
3684
3685 case Iop_GetElem8x16:
3686 complainIfUndefined(mce, atom2, NULL);
3687 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3688 case Iop_GetElem16x8:
3689 complainIfUndefined(mce, atom2, NULL);
3690 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3691 case Iop_GetElem32x4:
3692 complainIfUndefined(mce, atom2, NULL);
3693 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3694 case Iop_GetElem64x2:
3695 complainIfUndefined(mce, atom2, NULL);
3696 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3697
3698 /* Perm8x16: rearrange values in left arg using steering values
3699 from right arg. So rearrange the vbits in the same way but
3700 pessimise wrt steering values. Perm32x4 ditto. */
3701 case Iop_Perm8x16:
3702 return mkUifUV128(
3703 mce,
3704 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3705 mkPCast8x16(mce, vatom2)
3706 );
3707 case Iop_Perm32x4:
3708 return mkUifUV128(
3709 mce,
3710 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3711 mkPCast32x4(mce, vatom2)
3712 );
3713
3714 /* These two take the lower half of each 16-bit lane, sign/zero
3715 extend it to 32, and multiply together, producing a 32x4
3716 result (and implicitly ignoring half the operand bits). So
3717 treat it as a bunch of independent 16x8 operations, but then
3718 do 32-bit shifts left-right to copy the lower half results
3719 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3720 into the upper half of each result lane. */
3721 case Iop_MullEven16Ux8:
3722 case Iop_MullEven16Sx8: {
3723 IRAtom* at;
3724 at = binary16Ix8(mce,vatom1,vatom2);
3725 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3726 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
3727 return at;
3728 }
3729
3730 /* Same deal as Iop_MullEven16{S,U}x8 */
3731 case Iop_MullEven8Ux16:
3732 case Iop_MullEven8Sx16: {
3733 IRAtom* at;
3734 at = binary8Ix16(mce,vatom1,vatom2);
3735 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3736 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
3737 return at;
3738 }
3739
3740 /* Same deal as Iop_MullEven16{S,U}x8 */
3741 case Iop_MullEven32Ux4:
3742 case Iop_MullEven32Sx4: {
3743 IRAtom* at;
3744 at = binary32Ix4(mce,vatom1,vatom2);
3745 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN64x2, at, mkU8(32)));
3746 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN64x2, at, mkU8(32)));
3747 return at;
3748 }
3749
3750 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3751 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3752 Simply apply same op to the V bits, since this really no more
3753 than a data steering operation. */
3754 case Iop_NarrowBin32to16x8:
3755 case Iop_NarrowBin16to8x16:
3756 case Iop_NarrowBin64to32x4:
3757 return assignNew('V', mce, Ity_V128,
3758 binop(op, vatom1, vatom2));
3759
3760 case Iop_ShrV128:
3761 case Iop_ShlV128:
3762 case Iop_I128StoBCD128:
3763 /* Same scheme as with all other shifts. Note: 10 Nov 05:
3764 this is wrong now, scalar shifts are done properly lazily.
3765 Vector shifts should be fixed too. */
3766 complainIfUndefined(mce, atom2, NULL);
3767 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3768
3769 case Iop_BCDAdd:
3770 case Iop_BCDSub:
3771 return mkLazy2(mce, Ity_V128, vatom1, vatom2);
3772
3773 /* SHA Iops */
3774 case Iop_SHA256:
3775 case Iop_SHA512:
3776 complainIfUndefined(mce, atom2, NULL);
3777 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3778
3779 /* I128-bit data-steering */
3780 case Iop_64HLto128:
3781 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
3782
3783 /* V256-bit SIMD */
3784
3785 case Iop_Max64Fx4:
3786 case Iop_Min64Fx4:
3787 return binary64Fx4(mce, vatom1, vatom2);
3788
3789 case Iop_Max32Fx8:
3790 case Iop_Min32Fx8:
3791 return binary32Fx8(mce, vatom1, vatom2);
3792
3793 /* V256-bit data-steering */
3794 case Iop_V128HLtoV256:
3795 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3796
3797 /* Scalar floating point */
3798
3799 case Iop_F32toI64S:
3800 case Iop_F32toI64U:
3801 /* I32(rm) x F32 -> I64 */
3802 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3803
3804 case Iop_I64StoF32:
3805 /* I32(rm) x I64 -> F32 */
3806 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3807
3808 case Iop_RoundF64toInt:
3809 case Iop_RoundF64toF32:
3810 case Iop_F64toI64S:
3811 case Iop_F64toI64U:
3812 case Iop_I64StoF64:
3813 case Iop_I64UtoF64:
3814 case Iop_SinF64:
3815 case Iop_CosF64:
3816 case Iop_TanF64:
3817 case Iop_2xm1F64:
3818 case Iop_SqrtF64:
3819 case Iop_RecpExpF64:
3820 /* I32(rm) x I64/F64 -> I64/F64 */
3821 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3822
3823 case Iop_ShlD64:
3824 case Iop_ShrD64:
3825 case Iop_RoundD64toInt:
3826 /* I32(rm) x D64 -> D64 */
3827 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3828
3829 case Iop_ShlD128:
3830 case Iop_ShrD128:
3831 case Iop_RoundD128toInt:
3832 /* I32(rm) x D128 -> D128 */
3833 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3834
3835 case Iop_RoundF128toInt:
3836 /* I32(rm) x F128 -> F128 */
3837 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3838
3839 case Iop_D64toI64S:
3840 case Iop_D64toI64U:
3841 case Iop_I64StoD64:
3842 case Iop_I64UtoD64:
3843 /* I32(rm) x I64/D64 -> D64/I64 */
3844 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3845
3846 case Iop_F32toD32:
3847 case Iop_F64toD32:
3848 case Iop_F128toD32:
3849 case Iop_D32toF32:
3850 case Iop_D64toF32:
3851 case Iop_D128toF32:
3852 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
3853 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3854
3855 case Iop_F32toD64:
3856 case Iop_F64toD64:
3857 case Iop_F128toD64:
3858 case Iop_D32toF64:
3859 case Iop_D64toF64:
3860 case Iop_D128toF64:
3861 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
3862 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3863
3864 case Iop_F32toD128:
3865 case Iop_F64toD128:
3866 case Iop_F128toD128:
3867 case Iop_D32toF128:
3868 case Iop_D64toF128:
3869 case Iop_D128toF128:
3870 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
3871 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3872
3873 case Iop_RoundF32toInt:
3874 case Iop_SqrtF32:
3875 case Iop_RecpExpF32:
3876 /* I32(rm) x I32/F32 -> I32/F32 */
3877 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3878
3879 case Iop_SqrtF128:
3880 /* I32(rm) x F128 -> F128 */
3881 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3882
3883 case Iop_I32StoF32:
3884 case Iop_I32UtoF32:
3885 case Iop_F32toI32S:
3886 case Iop_F32toI32U:
3887 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3888 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3889
3890 case Iop_F64toF16:
3891 case Iop_F32toF16:
3892 /* First arg is I32 (rounding mode), second is F64/F32 (data). */
3893 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3894
3895 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
3896 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
3897 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
3898 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */
3899 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
3900 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3901
3902 case Iop_F128toI128S: /* IRRoundingMode(I32) x F128 -> signed I128 */
3903 case Iop_RndF128: /* IRRoundingMode(I32) x F128 -> F128 */
3904 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3905
3906 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
3907 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
3908 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
3909 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */
3910 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */
3911 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
3912 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3913
3914 case Iop_F64HLtoF128:
3915 case Iop_D64HLtoD128:
3916 return assignNew('V', mce, Ity_I128,
3917 binop(Iop_64HLto128, vatom1, vatom2));
3918
3919 case Iop_F64toI32U:
3920 case Iop_F64toI32S:
3921 case Iop_F64toF32:
3922 case Iop_I64UtoF32:
3923 case Iop_D64toI32U:
3924 case Iop_D64toI32S:
3925 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
3926 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3927
3928 case Iop_D64toD32:
3929 /* First arg is I32 (rounding mode), second is D64 (data). */
3930 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3931
3932 case Iop_F64toI16S:
3933 /* First arg is I32 (rounding mode), second is F64 (data). */
3934 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3935
3936 case Iop_InsertExpD64:
3937 /* I64 x I64 -> D64 */
3938 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3939
3940 case Iop_InsertExpD128:
3941 /* I64 x I128 -> D128 */
3942 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3943
3944 case Iop_CmpF32:
3945 case Iop_CmpF64:
3946 case Iop_CmpF128:
3947 case Iop_CmpD64:
3948 case Iop_CmpD128:
3949 case Iop_CmpExpD64:
3950 case Iop_CmpExpD128:
3951 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3952
3953 case Iop_MaxNumF32:
3954 case Iop_MinNumF32:
3955 /* F32 x F32 -> F32 */
3956 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3957
3958 case Iop_MaxNumF64:
3959 case Iop_MinNumF64:
3960 /* F64 x F64 -> F64 */
3961 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3962
3963 /* non-FP after here */
3964
3965 case Iop_DivModU64to32:
3966 case Iop_DivModS64to32:
3967 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3968
3969 case Iop_DivModU128to64:
3970 case Iop_DivModS128to64:
3971 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3972
3973 case Iop_8HLto16:
3974 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
3975 case Iop_16HLto32:
3976 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
3977 case Iop_32HLto64:
3978 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3979
3980 case Iop_DivModS64to64:
3981 case Iop_MullS64:
3982 case Iop_MullU64: {
3983 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3984 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
3985 return assignNew('V', mce, Ity_I128,
3986 binop(Iop_64HLto128, vHi64, vLo64));
3987 }
3988
3989 case Iop_MullS32:
3990 case Iop_MullU32: {
3991 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3992 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
3993 return assignNew('V', mce, Ity_I64,
3994 binop(Iop_32HLto64, vHi32, vLo32));
3995 }
3996
3997 case Iop_MullS16:
3998 case Iop_MullU16: {
3999 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
4000 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
4001 return assignNew('V', mce, Ity_I32,
4002 binop(Iop_16HLto32, vHi16, vLo16));
4003 }
4004
4005 case Iop_MullS8:
4006 case Iop_MullU8: {
4007 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
4008 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
4009 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
4010 }
4011
4012 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
4013 case Iop_DivS32:
4014 case Iop_DivU32:
4015 case Iop_DivU32E:
4016 case Iop_DivS32E:
4017 case Iop_QAdd32S: /* could probably do better */
4018 case Iop_QSub32S: /* could probably do better */
4019 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4020
4021 case Iop_DivS64:
4022 case Iop_DivU64:
4023 case Iop_DivS64E:
4024 case Iop_DivU64E:
4025 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4026
4027 case Iop_Add32:
4028 if (mce->bogusLiterals || mce->useLLVMworkarounds)
4029 return expensiveAddSub(mce,True,Ity_I32,
4030 vatom1,vatom2, atom1,atom2);
4031 else
4032 goto cheap_AddSub32;
4033 case Iop_Sub32:
4034 if (mce->bogusLiterals)
4035 return expensiveAddSub(mce,False,Ity_I32,
4036 vatom1,vatom2, atom1,atom2);
4037 else
4038 goto cheap_AddSub32;
4039
4040 cheap_AddSub32:
4041 case Iop_Mul32:
4042 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
4043
4044 case Iop_CmpORD32S:
4045 case Iop_CmpORD32U:
4046 case Iop_CmpORD64S:
4047 case Iop_CmpORD64U:
4048 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
4049
4050 case Iop_Add64:
4051 if (mce->bogusLiterals || mce->useLLVMworkarounds)
4052 return expensiveAddSub(mce,True,Ity_I64,
4053 vatom1,vatom2, atom1,atom2);
4054 else
4055 goto cheap_AddSub64;
4056 case Iop_Sub64:
4057 if (mce->bogusLiterals)
4058 return expensiveAddSub(mce,False,Ity_I64,
4059 vatom1,vatom2, atom1,atom2);
4060 else
4061 goto cheap_AddSub64;
4062
4063 cheap_AddSub64:
4064 case Iop_Mul64:
4065 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
4066
4067 case Iop_Mul16:
4068 case Iop_Add16:
4069 case Iop_Sub16:
4070 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
4071
4072 case Iop_Mul8:
4073 case Iop_Sub8:
4074 case Iop_Add8:
4075 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
4076
4077 case Iop_CmpEQ64:
4078 case Iop_CmpNE64:
4079 if (mce->bogusLiterals)
4080 goto expensive_cmp64;
4081 else
4082 goto cheap_cmp64;
4083
4084 expensive_cmp64:
4085 case Iop_ExpCmpNE64:
4086 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
4087
4088 cheap_cmp64:
4089 case Iop_CmpLE64S: case Iop_CmpLE64U:
4090 case Iop_CmpLT64U: case Iop_CmpLT64S:
4091 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
4092
4093 case Iop_CmpEQ32:
4094 case Iop_CmpNE32:
4095 if (mce->bogusLiterals)
4096 goto expensive_cmp32;
4097 else
4098 goto cheap_cmp32;
4099
4100 expensive_cmp32:
4101 case Iop_ExpCmpNE32:
4102 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
4103
4104 cheap_cmp32:
4105 case Iop_CmpLE32S: case Iop_CmpLE32U:
4106 case Iop_CmpLT32U: case Iop_CmpLT32S:
4107 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
4108
4109 case Iop_CmpEQ16: case Iop_CmpNE16:
4110 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
4111
4112 case Iop_ExpCmpNE16:
4113 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
4114
4115 case Iop_CmpEQ8: case Iop_CmpNE8:
4116 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
4117
4118 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
4119 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
4120 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
4121 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
4122 /* Just say these all produce a defined result, regardless
4123 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
4124 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
4125
4126 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
4127 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
4128
4129 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
4130 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
4131
4132 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
4133 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
4134
4135 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
4136 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
4137
4138 case Iop_AndV256:
4139 uifu = mkUifUV256; difd = mkDifDV256;
4140 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
4141 case Iop_AndV128:
4142 uifu = mkUifUV128; difd = mkDifDV128;
4143 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
4144 case Iop_And64:
4145 uifu = mkUifU64; difd = mkDifD64;
4146 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
4147 case Iop_And32:
4148 uifu = mkUifU32; difd = mkDifD32;
4149 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
4150 case Iop_And16:
4151 uifu = mkUifU16; difd = mkDifD16;
4152 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
4153 case Iop_And8:
4154 uifu = mkUifU8; difd = mkDifD8;
4155 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
4156
4157 case Iop_OrV256:
4158 uifu = mkUifUV256; difd = mkDifDV256;
4159 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
4160 case Iop_OrV128:
4161 uifu = mkUifUV128; difd = mkDifDV128;
4162 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
4163 case Iop_Or64:
4164 uifu = mkUifU64; difd = mkDifD64;
4165 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
4166 case Iop_Or32:
4167 uifu = mkUifU32; difd = mkDifD32;
4168 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
4169 case Iop_Or16:
4170 uifu = mkUifU16; difd = mkDifD16;
4171 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
4172 case Iop_Or8:
4173 uifu = mkUifU8; difd = mkDifD8;
4174 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
4175
4176 do_And_Or:
4177 return
4178 assignNew(
4179 'V', mce,
4180 and_or_ty,
4181 difd(mce, uifu(mce, vatom1, vatom2),
4182 difd(mce, improve(mce, atom1, vatom1),
4183 improve(mce, atom2, vatom2) ) ) );
4184
4185 case Iop_Xor8:
4186 return mkUifU8(mce, vatom1, vatom2);
4187 case Iop_Xor16:
4188 return mkUifU16(mce, vatom1, vatom2);
4189 case Iop_Xor32:
4190 return mkUifU32(mce, vatom1, vatom2);
4191 case Iop_Xor64:
4192 return mkUifU64(mce, vatom1, vatom2);
4193 case Iop_XorV128:
4194 return mkUifUV128(mce, vatom1, vatom2);
4195 case Iop_XorV256:
4196 return mkUifUV256(mce, vatom1, vatom2);
4197
4198 /* V256-bit SIMD */
4199
4200 case Iop_ShrN16x16:
4201 case Iop_ShrN32x8:
4202 case Iop_ShrN64x4:
4203 case Iop_SarN16x16:
4204 case Iop_SarN32x8:
4205 case Iop_ShlN16x16:
4206 case Iop_ShlN32x8:
4207 case Iop_ShlN64x4:
4208 /* Same scheme as with all other shifts. Note: 22 Oct 05:
4209 this is wrong now, scalar shifts are done properly lazily.
4210 Vector shifts should be fixed too. */
4211 complainIfUndefined(mce, atom2, NULL);
4212 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2));
4213
4214 case Iop_QSub8Ux32:
4215 case Iop_QSub8Sx32:
4216 case Iop_Sub8x32:
4217 case Iop_Min8Ux32:
4218 case Iop_Min8Sx32:
4219 case Iop_Max8Ux32:
4220 case Iop_Max8Sx32:
4221 case Iop_CmpGT8Sx32:
4222 case Iop_CmpEQ8x32:
4223 case Iop_Avg8Ux32:
4224 case Iop_QAdd8Ux32:
4225 case Iop_QAdd8Sx32:
4226 case Iop_Add8x32:
4227 return binary8Ix32(mce, vatom1, vatom2);
4228
4229 case Iop_QSub16Ux16:
4230 case Iop_QSub16Sx16:
4231 case Iop_Sub16x16:
4232 case Iop_Mul16x16:
4233 case Iop_MulHi16Sx16:
4234 case Iop_MulHi16Ux16:
4235 case Iop_Min16Sx16:
4236 case Iop_Min16Ux16:
4237 case Iop_Max16Sx16:
4238 case Iop_Max16Ux16:
4239 case Iop_CmpGT16Sx16:
4240 case Iop_CmpEQ16x16:
4241 case Iop_Avg16Ux16:
4242 case Iop_QAdd16Ux16:
4243 case Iop_QAdd16Sx16:
4244 case Iop_Add16x16:
4245 return binary16Ix16(mce, vatom1, vatom2);
4246
4247 case Iop_Sub32x8:
4248 case Iop_CmpGT32Sx8:
4249 case Iop_CmpEQ32x8:
4250 case Iop_Add32x8:
4251 case Iop_Max32Ux8:
4252 case Iop_Max32Sx8:
4253 case Iop_Min32Ux8:
4254 case Iop_Min32Sx8:
4255 case Iop_Mul32x8:
4256 return binary32Ix8(mce, vatom1, vatom2);
4257
4258 case Iop_Sub64x4:
4259 case Iop_Add64x4:
4260 case Iop_CmpEQ64x4:
4261 case Iop_CmpGT64Sx4:
4262 return binary64Ix4(mce, vatom1, vatom2);
4263
4264 /* Perm32x8: rearrange values in left arg using steering values
4265 from right arg. So rearrange the vbits in the same way but
4266 pessimise wrt steering values. */
4267 case Iop_Perm32x8:
4268 return mkUifUV256(
4269 mce,
4270 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)),
4271 mkPCast32x8(mce, vatom2)
4272 );
4273
4274 /* Q-and-Qshift-by-vector of the form (V128, V128) -> V256.
4275 Handle the shifted results in the same way that other
4276 binary Q ops are handled, eg QSub: UifU the two args,
4277 then pessimise -- which is binaryNIxM. But for the upper
4278 V128, we require to generate just 1 bit which is the
4279 pessimised shift result, with 127 defined zeroes above it.
4280
4281 Note that this overly pessimistic in that in fact only the
4282 bottom 8 bits of each lane of the second arg determine the shift
4283 amount. Really we ought to ignore any undefinedness in the
4284 rest of the lanes of the second arg. */
4285 case Iop_QandSQsh64x2: case Iop_QandUQsh64x2:
4286 case Iop_QandSQRsh64x2: case Iop_QandUQRsh64x2:
4287 case Iop_QandSQsh32x4: case Iop_QandUQsh32x4:
4288 case Iop_QandSQRsh32x4: case Iop_QandUQRsh32x4:
4289 case Iop_QandSQsh16x8: case Iop_QandUQsh16x8:
4290 case Iop_QandSQRsh16x8: case Iop_QandUQRsh16x8:
4291 case Iop_QandSQsh8x16: case Iop_QandUQsh8x16:
4292 case Iop_QandSQRsh8x16: case Iop_QandUQRsh8x16:
4293 {
4294 // The function to generate the pessimised shift result
4295 IRAtom* (*binaryNIxM)(MCEnv*,IRAtom*,IRAtom*) = NULL;
4296 switch (op) {
4297 case Iop_QandSQsh64x2:
4298 case Iop_QandUQsh64x2:
4299 case Iop_QandSQRsh64x2:
4300 case Iop_QandUQRsh64x2:
4301 binaryNIxM = binary64Ix2;
4302 break;
4303 case Iop_QandSQsh32x4:
4304 case Iop_QandUQsh32x4:
4305 case Iop_QandSQRsh32x4:
4306 case Iop_QandUQRsh32x4:
4307 binaryNIxM = binary32Ix4;
4308 break;
4309 case Iop_QandSQsh16x8:
4310 case Iop_QandUQsh16x8:
4311 case Iop_QandSQRsh16x8:
4312 case Iop_QandUQRsh16x8:
4313 binaryNIxM = binary16Ix8;
4314 break;
4315 case Iop_QandSQsh8x16:
4316 case Iop_QandUQsh8x16:
4317 case Iop_QandSQRsh8x16:
4318 case Iop_QandUQRsh8x16:
4319 binaryNIxM = binary8Ix16;
4320 break;
4321 default:
4322 tl_assert(0);
4323 }
4324 tl_assert(binaryNIxM);
4325 // Pessimised shift result, shV[127:0]
4326 IRAtom* shV = binaryNIxM(mce, vatom1, vatom2);
4327 // Generates: Def--(127)--Def PCast-to-I1(shV)
4328 IRAtom* qV = mkPCastXXtoXXlsb(mce, shV, Ity_V128);
4329 // and assemble the result
4330 return assignNew('V', mce, Ity_V256,
4331 binop(Iop_V128HLtoV256, qV, shV));
4332 }
4333
4334 default:
4335 ppIROp(op);
4336 VG_(tool_panic)("memcheck:expr2vbits_Binop");
4337 }
4338 }
4339
4340
4341 static
expr2vbits_Unop(MCEnv * mce,IROp op,IRAtom * atom)4342 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
4343 {
4344 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
4345 selection of shadow operation implicitly duplicates the logic in
4346 do_shadow_LoadG and should be kept in sync (in the very unlikely
4347 event that the interpretation of such widening ops changes in
4348 future). See comment in do_shadow_LoadG. */
4349 IRAtom* vatom = expr2vbits( mce, atom );
4350 tl_assert(isOriginalAtom(mce,atom));
4351 switch (op) {
4352
4353 case Iop_Abs64Fx2:
4354 case Iop_Neg64Fx2:
4355 case Iop_RSqrtEst64Fx2:
4356 case Iop_RecipEst64Fx2:
4357 return unary64Fx2(mce, vatom);
4358
4359 case Iop_Sqrt64F0x2:
4360 return unary64F0x2(mce, vatom);
4361
4362 case Iop_Sqrt32Fx8:
4363 case Iop_RSqrtEst32Fx8:
4364 case Iop_RecipEst32Fx8:
4365 return unary32Fx8(mce, vatom);
4366
4367 case Iop_Sqrt64Fx4:
4368 return unary64Fx4(mce, vatom);
4369
4370 case Iop_RecipEst32Fx4:
4371 case Iop_I32UtoFx4:
4372 case Iop_I32StoFx4:
4373 case Iop_QFtoI32Ux4_RZ:
4374 case Iop_QFtoI32Sx4_RZ:
4375 case Iop_RoundF32x4_RM:
4376 case Iop_RoundF32x4_RP:
4377 case Iop_RoundF32x4_RN:
4378 case Iop_RoundF32x4_RZ:
4379 case Iop_RecipEst32Ux4:
4380 case Iop_Abs32Fx4:
4381 case Iop_Neg32Fx4:
4382 case Iop_RSqrtEst32Fx4:
4383 return unary32Fx4(mce, vatom);
4384
4385 case Iop_I32UtoFx2:
4386 case Iop_I32StoFx2:
4387 case Iop_RecipEst32Fx2:
4388 case Iop_RecipEst32Ux2:
4389 case Iop_Abs32Fx2:
4390 case Iop_Neg32Fx2:
4391 case Iop_RSqrtEst32Fx2:
4392 return unary32Fx2(mce, vatom);
4393
4394 case Iop_Sqrt32F0x4:
4395 case Iop_RSqrtEst32F0x4:
4396 case Iop_RecipEst32F0x4:
4397 return unary32F0x4(mce, vatom);
4398
4399 case Iop_32UtoV128:
4400 case Iop_64UtoV128:
4401 case Iop_Dup8x16:
4402 case Iop_Dup16x8:
4403 case Iop_Dup32x4:
4404 case Iop_Reverse1sIn8_x16:
4405 case Iop_Reverse8sIn16_x8:
4406 case Iop_Reverse8sIn32_x4:
4407 case Iop_Reverse16sIn32_x4:
4408 case Iop_Reverse8sIn64_x2:
4409 case Iop_Reverse16sIn64_x2:
4410 case Iop_Reverse32sIn64_x2:
4411 case Iop_V256toV128_1: case Iop_V256toV128_0:
4412 case Iop_ZeroHI64ofV128:
4413 case Iop_ZeroHI96ofV128:
4414 case Iop_ZeroHI112ofV128:
4415 case Iop_ZeroHI120ofV128:
4416 return assignNew('V', mce, Ity_V128, unop(op, vatom));
4417
4418 case Iop_F128HItoF64: /* F128 -> high half of F128 */
4419 case Iop_D128HItoD64: /* D128 -> high half of D128 */
4420 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
4421 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
4422 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
4423 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
4424
4425 case Iop_NegF128:
4426 case Iop_AbsF128:
4427 case Iop_RndF128:
4428 case Iop_TruncF128toI64S: /* F128 -> I64S */
4429 case Iop_TruncF128toI32S: /* F128 -> I32S (result stored in 64-bits) */
4430 case Iop_TruncF128toI64U: /* F128 -> I64U */
4431 case Iop_TruncF128toI32U: /* F128 -> I32U (result stored in 64-bits) */
4432 return mkPCastTo(mce, Ity_I128, vatom);
4433
4434 case Iop_BCD128toI128S:
4435 case Iop_MulI128by10:
4436 case Iop_MulI128by10Carry:
4437 case Iop_F16toF64x2:
4438 case Iop_F64toF16x2:
4439 return vatom;
4440
4441 case Iop_I32StoF128: /* signed I32 -> F128 */
4442 case Iop_I64StoF128: /* signed I64 -> F128 */
4443 case Iop_I32UtoF128: /* unsigned I32 -> F128 */
4444 case Iop_I64UtoF128: /* unsigned I64 -> F128 */
4445 case Iop_F32toF128: /* F32 -> F128 */
4446 case Iop_F64toF128: /* F64 -> F128 */
4447 case Iop_I32StoD128: /* signed I64 -> D128 */
4448 case Iop_I64StoD128: /* signed I64 -> D128 */
4449 case Iop_I32UtoD128: /* unsigned I32 -> D128 */
4450 case Iop_I64UtoD128: /* unsigned I64 -> D128 */
4451 return mkPCastTo(mce, Ity_I128, vatom);
4452
4453 case Iop_F16toF64:
4454 case Iop_F32toF64:
4455 case Iop_I32StoF64:
4456 case Iop_I32UtoF64:
4457 case Iop_NegF64:
4458 case Iop_AbsF64:
4459 case Iop_RSqrtEst5GoodF64:
4460 case Iop_RoundF64toF64_NEAREST:
4461 case Iop_RoundF64toF64_NegINF:
4462 case Iop_RoundF64toF64_PosINF:
4463 case Iop_RoundF64toF64_ZERO:
4464 case Iop_Clz64:
4465 case Iop_D32toD64:
4466 case Iop_I32StoD64:
4467 case Iop_I32UtoD64:
4468 case Iop_ExtractExpD64: /* D64 -> I64 */
4469 case Iop_ExtractExpD128: /* D128 -> I64 */
4470 case Iop_ExtractSigD64: /* D64 -> I64 */
4471 case Iop_ExtractSigD128: /* D128 -> I64 */
4472 case Iop_DPBtoBCD:
4473 case Iop_BCDtoDPB:
4474 return mkPCastTo(mce, Ity_I64, vatom);
4475
4476 case Iop_D64toD128:
4477 return mkPCastTo(mce, Ity_I128, vatom);
4478
4479 case Iop_Clz32:
4480 case Iop_TruncF64asF32:
4481 case Iop_NegF32:
4482 case Iop_AbsF32:
4483 case Iop_F16toF32:
4484 return mkPCastTo(mce, Ity_I32, vatom);
4485
4486 case Iop_Ctz32:
4487 case Iop_Ctz64:
4488 return expensiveCountTrailingZeroes(mce, op, atom, vatom);
4489
4490 case Iop_1Uto64:
4491 case Iop_1Sto64:
4492 case Iop_8Uto64:
4493 case Iop_8Sto64:
4494 case Iop_16Uto64:
4495 case Iop_16Sto64:
4496 case Iop_32Sto64:
4497 case Iop_32Uto64:
4498 case Iop_V128to64:
4499 case Iop_V128HIto64:
4500 case Iop_128HIto64:
4501 case Iop_128to64:
4502 case Iop_Dup8x8:
4503 case Iop_Dup16x4:
4504 case Iop_Dup32x2:
4505 case Iop_Reverse8sIn16_x4:
4506 case Iop_Reverse8sIn32_x2:
4507 case Iop_Reverse16sIn32_x2:
4508 case Iop_Reverse8sIn64_x1:
4509 case Iop_Reverse16sIn64_x1:
4510 case Iop_Reverse32sIn64_x1:
4511 case Iop_V256to64_0: case Iop_V256to64_1:
4512 case Iop_V256to64_2: case Iop_V256to64_3:
4513 return assignNew('V', mce, Ity_I64, unop(op, vatom));
4514
4515 case Iop_64to32:
4516 case Iop_64HIto32:
4517 case Iop_1Uto32:
4518 case Iop_1Sto32:
4519 case Iop_8Uto32:
4520 case Iop_16Uto32:
4521 case Iop_16Sto32:
4522 case Iop_8Sto32:
4523 case Iop_V128to32:
4524 return assignNew('V', mce, Ity_I32, unop(op, vatom));
4525
4526 case Iop_8Sto16:
4527 case Iop_8Uto16:
4528 case Iop_32to16:
4529 case Iop_32HIto16:
4530 case Iop_64to16:
4531 case Iop_GetMSBs8x16:
4532 return assignNew('V', mce, Ity_I16, unop(op, vatom));
4533
4534 case Iop_1Uto8:
4535 case Iop_1Sto8:
4536 case Iop_16to8:
4537 case Iop_16HIto8:
4538 case Iop_32to8:
4539 case Iop_64to8:
4540 case Iop_GetMSBs8x8:
4541 return assignNew('V', mce, Ity_I8, unop(op, vatom));
4542
4543 case Iop_32to1:
4544 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
4545
4546 case Iop_64to1:
4547 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
4548
4549 case Iop_ReinterpF64asI64:
4550 case Iop_ReinterpI64asF64:
4551 case Iop_ReinterpI32asF32:
4552 case Iop_ReinterpF32asI32:
4553 case Iop_ReinterpI64asD64:
4554 case Iop_ReinterpD64asI64:
4555 case Iop_NotV256:
4556 case Iop_NotV128:
4557 case Iop_Not64:
4558 case Iop_Not32:
4559 case Iop_Not16:
4560 case Iop_Not8:
4561 case Iop_Not1:
4562 return vatom;
4563
4564 case Iop_CmpNEZ8x8:
4565 case Iop_Cnt8x8:
4566 case Iop_Clz8x8:
4567 case Iop_Cls8x8:
4568 case Iop_Abs8x8:
4569 return mkPCast8x8(mce, vatom);
4570
4571 case Iop_CmpNEZ8x16:
4572 case Iop_Cnt8x16:
4573 case Iop_Clz8x16:
4574 case Iop_Cls8x16:
4575 case Iop_Abs8x16:
4576 case Iop_Ctz8x16:
4577 return mkPCast8x16(mce, vatom);
4578
4579 case Iop_CmpNEZ16x4:
4580 case Iop_Clz16x4:
4581 case Iop_Cls16x4:
4582 case Iop_Abs16x4:
4583 return mkPCast16x4(mce, vatom);
4584
4585 case Iop_CmpNEZ16x8:
4586 case Iop_Clz16x8:
4587 case Iop_Cls16x8:
4588 case Iop_Abs16x8:
4589 case Iop_Ctz16x8:
4590 return mkPCast16x8(mce, vatom);
4591
4592 case Iop_CmpNEZ32x2:
4593 case Iop_Clz32x2:
4594 case Iop_Cls32x2:
4595 case Iop_FtoI32Ux2_RZ:
4596 case Iop_FtoI32Sx2_RZ:
4597 case Iop_Abs32x2:
4598 return mkPCast32x2(mce, vatom);
4599
4600 case Iop_CmpNEZ32x4:
4601 case Iop_Clz32x4:
4602 case Iop_Cls32x4:
4603 case Iop_FtoI32Ux4_RZ:
4604 case Iop_FtoI32Sx4_RZ:
4605 case Iop_Abs32x4:
4606 case Iop_RSqrtEst32Ux4:
4607 case Iop_Ctz32x4:
4608 return mkPCast32x4(mce, vatom);
4609
4610 case Iop_CmpwNEZ32:
4611 return mkPCastTo(mce, Ity_I32, vatom);
4612
4613 case Iop_CmpwNEZ64:
4614 return mkPCastTo(mce, Ity_I64, vatom);
4615
4616 case Iop_CmpNEZ64x2:
4617 case Iop_CipherSV128:
4618 case Iop_Clz64x2:
4619 case Iop_Abs64x2:
4620 case Iop_Ctz64x2:
4621 return mkPCast64x2(mce, vatom);
4622
4623 case Iop_PwBitMtxXpose64x2:
4624 return assignNew('V', mce, Ity_V128, unop(op, vatom));
4625
4626 case Iop_NarrowUn16to8x8:
4627 case Iop_NarrowUn32to16x4:
4628 case Iop_NarrowUn64to32x2:
4629 case Iop_QNarrowUn16Sto8Sx8:
4630 case Iop_QNarrowUn16Sto8Ux8:
4631 case Iop_QNarrowUn16Uto8Ux8:
4632 case Iop_QNarrowUn32Sto16Sx4:
4633 case Iop_QNarrowUn32Sto16Ux4:
4634 case Iop_QNarrowUn32Uto16Ux4:
4635 case Iop_QNarrowUn64Sto32Sx2:
4636 case Iop_QNarrowUn64Sto32Ux2:
4637 case Iop_QNarrowUn64Uto32Ux2:
4638 case Iop_F32toF16x4:
4639 return vectorNarrowUnV128(mce, op, vatom);
4640
4641 case Iop_Widen8Sto16x8:
4642 case Iop_Widen8Uto16x8:
4643 case Iop_Widen16Sto32x4:
4644 case Iop_Widen16Uto32x4:
4645 case Iop_Widen32Sto64x2:
4646 case Iop_Widen32Uto64x2:
4647 case Iop_F16toF32x4:
4648 return vectorWidenI64(mce, op, vatom);
4649
4650 case Iop_PwAddL32Ux2:
4651 case Iop_PwAddL32Sx2:
4652 return mkPCastTo(mce, Ity_I64,
4653 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
4654
4655 case Iop_PwAddL16Ux4:
4656 case Iop_PwAddL16Sx4:
4657 return mkPCast32x2(mce,
4658 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
4659
4660 case Iop_PwAddL8Ux8:
4661 case Iop_PwAddL8Sx8:
4662 return mkPCast16x4(mce,
4663 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
4664
4665 case Iop_PwAddL32Ux4:
4666 case Iop_PwAddL32Sx4:
4667 return mkPCast64x2(mce,
4668 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
4669
4670 case Iop_PwAddL16Ux8:
4671 case Iop_PwAddL16Sx8:
4672 return mkPCast32x4(mce,
4673 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
4674
4675 case Iop_PwAddL8Ux16:
4676 case Iop_PwAddL8Sx16:
4677 return mkPCast16x8(mce,
4678 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
4679
4680 case Iop_I64UtoF32:
4681 default:
4682 ppIROp(op);
4683 VG_(tool_panic)("memcheck:expr2vbits_Unop");
4684 }
4685 }
4686
4687
4688 /* Worker function -- do not call directly. See comments on
4689 expr2vbits_Load for the meaning of |guard|.
4690
4691 Generates IR to (1) perform a definedness test of |addr|, (2)
4692 perform a validity test of |addr|, and (3) return the Vbits for the
4693 location indicated by |addr|. All of this only happens when
4694 |guard| is NULL or |guard| evaluates to True at run time.
4695
4696 If |guard| evaluates to False at run time, the returned value is
4697 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
4698 performed.
4699
4700 The definedness of |guard| itself is not checked. That is assumed
4701 to have been done before this point, by the caller. */
4702 static
expr2vbits_Load_WRK(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard)4703 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
4704 IREndness end, IRType ty,
4705 IRAtom* addr, UInt bias, IRAtom* guard )
4706 {
4707 tl_assert(isOriginalAtom(mce,addr));
4708 tl_assert(end == Iend_LE || end == Iend_BE);
4709
4710 /* First, emit a definedness test for the address. This also sets
4711 the address (shadow) to 'defined' following the test. */
4712 complainIfUndefined( mce, addr, guard );
4713
4714 /* Now cook up a call to the relevant helper function, to read the
4715 data V bits from shadow memory. */
4716 ty = shadowTypeV(ty);
4717
4718 void* helper = NULL;
4719 const HChar* hname = NULL;
4720 Bool ret_via_outparam = False;
4721
4722 if (end == Iend_LE) {
4723 switch (ty) {
4724 case Ity_V256: helper = &MC_(helperc_LOADV256le);
4725 hname = "MC_(helperc_LOADV256le)";
4726 ret_via_outparam = True;
4727 break;
4728 case Ity_V128: helper = &MC_(helperc_LOADV128le);
4729 hname = "MC_(helperc_LOADV128le)";
4730 ret_via_outparam = True;
4731 break;
4732 case Ity_I64: helper = &MC_(helperc_LOADV64le);
4733 hname = "MC_(helperc_LOADV64le)";
4734 break;
4735 case Ity_I32: helper = &MC_(helperc_LOADV32le);
4736 hname = "MC_(helperc_LOADV32le)";
4737 break;
4738 case Ity_I16: helper = &MC_(helperc_LOADV16le);
4739 hname = "MC_(helperc_LOADV16le)";
4740 break;
4741 case Ity_I8: helper = &MC_(helperc_LOADV8);
4742 hname = "MC_(helperc_LOADV8)";
4743 break;
4744 default: ppIRType(ty);
4745 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)");
4746 }
4747 } else {
4748 switch (ty) {
4749 case Ity_V256: helper = &MC_(helperc_LOADV256be);
4750 hname = "MC_(helperc_LOADV256be)";
4751 ret_via_outparam = True;
4752 break;
4753 case Ity_V128: helper = &MC_(helperc_LOADV128be);
4754 hname = "MC_(helperc_LOADV128be)";
4755 ret_via_outparam = True;
4756 break;
4757 case Ity_I64: helper = &MC_(helperc_LOADV64be);
4758 hname = "MC_(helperc_LOADV64be)";
4759 break;
4760 case Ity_I32: helper = &MC_(helperc_LOADV32be);
4761 hname = "MC_(helperc_LOADV32be)";
4762 break;
4763 case Ity_I16: helper = &MC_(helperc_LOADV16be);
4764 hname = "MC_(helperc_LOADV16be)";
4765 break;
4766 case Ity_I8: helper = &MC_(helperc_LOADV8);
4767 hname = "MC_(helperc_LOADV8)";
4768 break;
4769 default: ppIRType(ty);
4770 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)");
4771 }
4772 }
4773
4774 tl_assert(helper);
4775 tl_assert(hname);
4776
4777 /* Generate the actual address into addrAct. */
4778 IRAtom* addrAct;
4779 if (bias == 0) {
4780 addrAct = addr;
4781 } else {
4782 IROp mkAdd;
4783 IRAtom* eBias;
4784 IRType tyAddr = mce->hWordTy;
4785 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
4786 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4787 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
4788 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
4789 }
4790
4791 /* We need to have a place to park the V bits we're just about to
4792 read. */
4793 IRTemp datavbits = newTemp(mce, ty, VSh);
4794
4795 /* Here's the call. */
4796 IRDirty* di;
4797 if (ret_via_outparam) {
4798 di = unsafeIRDirty_1_N( datavbits,
4799 2/*regparms*/,
4800 hname, VG_(fnptr_to_fnentry)( helper ),
4801 mkIRExprVec_2( IRExpr_VECRET(), addrAct ) );
4802 } else {
4803 di = unsafeIRDirty_1_N( datavbits,
4804 1/*regparms*/,
4805 hname, VG_(fnptr_to_fnentry)( helper ),
4806 mkIRExprVec_1( addrAct ) );
4807 }
4808
4809 setHelperAnns( mce, di );
4810 if (guard) {
4811 di->guard = guard;
4812 /* Ideally the didn't-happen return value here would be all-ones
4813 (all-undefined), so it'd be obvious if it got used
4814 inadvertently. We can get by with the IR-mandated default
4815 value (0b01 repeating, 0x55 etc) as that'll still look pretty
4816 undefined if it ever leaks out. */
4817 }
4818 stmt( 'V', mce, IRStmt_Dirty(di) );
4819
4820 return mkexpr(datavbits);
4821 }
4822
4823
4824 /* Generate IR to do a shadow load. The helper is expected to check
4825 the validity of the address and return the V bits for that address.
4826 This can optionally be controlled by a guard, which is assumed to
4827 be True if NULL. In the case where the guard is False at runtime,
4828 the helper will return the didn't-do-the-call value of 0x55..55.
4829 Since that means "completely undefined result", the caller of
4830 this function will need to fix up the result somehow in that
4831 case.
4832
4833 Caller of this function is also expected to have checked the
4834 definedness of |guard| before this point.
4835 */
4836 static
expr2vbits_Load(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard)4837 IRAtom* expr2vbits_Load ( MCEnv* mce,
4838 IREndness end, IRType ty,
4839 IRAtom* addr, UInt bias,
4840 IRAtom* guard )
4841 {
4842 tl_assert(end == Iend_LE || end == Iend_BE);
4843 switch (shadowTypeV(ty)) {
4844 case Ity_I8:
4845 case Ity_I16:
4846 case Ity_I32:
4847 case Ity_I64:
4848 case Ity_V128:
4849 case Ity_V256:
4850 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
4851 default:
4852 VG_(tool_panic)("expr2vbits_Load");
4853 }
4854 }
4855
4856
4857 /* The most general handler for guarded loads. Assumes the
4858 definedness of GUARD has already been checked by the caller. A
4859 GUARD of NULL is assumed to mean "always True". Generates code to
4860 check the definedness and validity of ADDR.
4861
4862 Generate IR to do a shadow load from ADDR and return the V bits.
4863 The loaded type is TY. The loaded data is then (shadow) widened by
4864 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
4865 evaluates to False at run time then the returned Vbits are simply
4866 VALT instead. Note therefore that the argument type of VWIDEN must
4867 be TY and the result type of VWIDEN must equal the type of VALT.
4868 */
4869 static
expr2vbits_Load_guarded_General(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard,IROp vwiden,IRAtom * valt)4870 IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce,
4871 IREndness end, IRType ty,
4872 IRAtom* addr, UInt bias,
4873 IRAtom* guard,
4874 IROp vwiden, IRAtom* valt )
4875 {
4876 /* Sanity check the conversion operation, and also set TYWIDE. */
4877 IRType tyWide = Ity_INVALID;
4878 switch (vwiden) {
4879 case Iop_INVALID:
4880 tyWide = ty;
4881 break;
4882 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32:
4883 tyWide = Ity_I32;
4884 break;
4885 default:
4886 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General");
4887 }
4888
4889 /* If the guard evaluates to True, this will hold the loaded V bits
4890 at TY. If the guard evaluates to False, this will be all
4891 ones, meaning "all undefined", in which case we will have to
4892 replace it using an ITE below. */
4893 IRAtom* iftrue1
4894 = assignNew('V', mce, ty,
4895 expr2vbits_Load(mce, end, ty, addr, bias, guard));
4896 /* Now (shadow-) widen the loaded V bits to the desired width. In
4897 the guard-is-False case, the allowable widening operators will
4898 in the worst case (unsigned widening) at least leave the
4899 pre-widened part as being marked all-undefined, and in the best
4900 case (signed widening) mark the whole widened result as
4901 undefined. Anyway, it doesn't matter really, since in this case
4902 we will replace said value with the default value |valt| using an
4903 ITE. */
4904 IRAtom* iftrue2
4905 = vwiden == Iop_INVALID
4906 ? iftrue1
4907 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1));
4908 /* These are the V bits we will return if the load doesn't take
4909 place. */
4910 IRAtom* iffalse
4911 = valt;
4912 /* Prepare the cond for the ITE. Convert a NULL cond into
4913 something that iropt knows how to fold out later. */
4914 IRAtom* cond
4915 = guard == NULL ? mkU1(1) : guard;
4916 /* And assemble the final result. */
4917 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse));
4918 }
4919
4920
4921 /* A simpler handler for guarded loads, in which there is no
4922 conversion operation, and the default V bit return (when the guard
4923 evaluates to False at runtime) is "all defined". If there is no
4924 guard expression or the guard is always TRUE this function behaves
4925 like expr2vbits_Load. It is assumed that definedness of GUARD has
4926 already been checked at the call site. */
4927 static
expr2vbits_Load_guarded_Simple(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard)4928 IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce,
4929 IREndness end, IRType ty,
4930 IRAtom* addr, UInt bias,
4931 IRAtom *guard )
4932 {
4933 return expr2vbits_Load_guarded_General(
4934 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty)
4935 );
4936 }
4937
4938
4939 static
expr2vbits_ITE(MCEnv * mce,IRAtom * cond,IRAtom * iftrue,IRAtom * iffalse)4940 IRAtom* expr2vbits_ITE ( MCEnv* mce,
4941 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
4942 {
4943 IRAtom *vbitsC, *vbits0, *vbits1;
4944 IRType ty;
4945 /* Given ITE(cond, iftrue, iffalse), generate
4946 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
4947 That is, steer the V bits like the originals, but trash the
4948 result if the steering value is undefined. This gives
4949 lazy propagation. */
4950 tl_assert(isOriginalAtom(mce, cond));
4951 tl_assert(isOriginalAtom(mce, iftrue));
4952 tl_assert(isOriginalAtom(mce, iffalse));
4953
4954 vbitsC = expr2vbits(mce, cond);
4955 vbits1 = expr2vbits(mce, iftrue);
4956 vbits0 = expr2vbits(mce, iffalse);
4957 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
4958
4959 return
4960 mkUifU(mce, ty, assignNew('V', mce, ty,
4961 IRExpr_ITE(cond, vbits1, vbits0)),
4962 mkPCastTo(mce, ty, vbitsC) );
4963 }
4964
4965 /* --------- This is the main expression-handling function. --------- */
4966
4967 static
expr2vbits(MCEnv * mce,IRExpr * e)4968 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4969 {
4970 switch (e->tag) {
4971
4972 case Iex_Get:
4973 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4974
4975 case Iex_GetI:
4976 return shadow_GETI( mce, e->Iex.GetI.descr,
4977 e->Iex.GetI.ix, e->Iex.GetI.bias );
4978
4979 case Iex_RdTmp:
4980 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
4981
4982 case Iex_Const:
4983 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
4984
4985 case Iex_Qop:
4986 return expr2vbits_Qop(
4987 mce,
4988 e->Iex.Qop.details->op,
4989 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4990 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
4991 );
4992
4993 case Iex_Triop:
4994 return expr2vbits_Triop(
4995 mce,
4996 e->Iex.Triop.details->op,
4997 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4998 e->Iex.Triop.details->arg3
4999 );
5000
5001 case Iex_Binop:
5002 return expr2vbits_Binop(
5003 mce,
5004 e->Iex.Binop.op,
5005 e->Iex.Binop.arg1, e->Iex.Binop.arg2
5006 );
5007
5008 case Iex_Unop:
5009 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
5010
5011 case Iex_Load:
5012 return expr2vbits_Load( mce, e->Iex.Load.end,
5013 e->Iex.Load.ty,
5014 e->Iex.Load.addr, 0/*addr bias*/,
5015 NULL/* guard == "always True"*/ );
5016
5017 case Iex_CCall:
5018 return mkLazyN( mce, e->Iex.CCall.args,
5019 e->Iex.CCall.retty,
5020 e->Iex.CCall.cee );
5021
5022 case Iex_ITE:
5023 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
5024 e->Iex.ITE.iffalse);
5025
5026 default:
5027 VG_(printf)("\n");
5028 ppIRExpr(e);
5029 VG_(printf)("\n");
5030 VG_(tool_panic)("memcheck: expr2vbits");
5031 }
5032 }
5033
5034 /*------------------------------------------------------------*/
5035 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
5036 /*------------------------------------------------------------*/
5037
5038 /* Widen a value to the host word size. */
5039
5040 static
zwidenToHostWord(MCEnv * mce,IRAtom * vatom)5041 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
5042 {
5043 IRType ty, tyH;
5044
5045 /* vatom is vbits-value and as such can only have a shadow type. */
5046 tl_assert(isShadowAtom(mce,vatom));
5047
5048 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
5049 tyH = mce->hWordTy;
5050
5051 if (tyH == Ity_I32) {
5052 switch (ty) {
5053 case Ity_I32:
5054 return vatom;
5055 case Ity_I16:
5056 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
5057 case Ity_I8:
5058 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
5059 default:
5060 goto unhandled;
5061 }
5062 } else
5063 if (tyH == Ity_I64) {
5064 switch (ty) {
5065 case Ity_I32:
5066 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
5067 case Ity_I16:
5068 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
5069 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
5070 case Ity_I8:
5071 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
5072 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
5073 default:
5074 goto unhandled;
5075 }
5076 } else {
5077 goto unhandled;
5078 }
5079 unhandled:
5080 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
5081 VG_(tool_panic)("zwidenToHostWord");
5082 }
5083
5084
5085 /* Generate a shadow store. |addr| is always the original address
5086 atom. You can pass in either originals or V-bits for the data
5087 atom, but obviously not both. This function generates a check for
5088 the definedness and (indirectly) the validity of |addr|, but only
5089 when |guard| evaluates to True at run time (or is NULL).
5090
5091 |guard| :: Ity_I1 controls whether the store really happens; NULL
5092 means it unconditionally does. Note that |guard| itself is not
5093 checked for definedness; the caller of this function must do that
5094 if necessary.
5095 */
5096 static
do_shadow_Store(MCEnv * mce,IREndness end,IRAtom * addr,UInt bias,IRAtom * data,IRAtom * vdata,IRAtom * guard)5097 void do_shadow_Store ( MCEnv* mce,
5098 IREndness end,
5099 IRAtom* addr, UInt bias,
5100 IRAtom* data, IRAtom* vdata,
5101 IRAtom* guard )
5102 {
5103 IROp mkAdd;
5104 IRType ty, tyAddr;
5105 void* helper = NULL;
5106 const HChar* hname = NULL;
5107 IRConst* c;
5108
5109 tyAddr = mce->hWordTy;
5110 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
5111 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
5112 tl_assert( end == Iend_LE || end == Iend_BE );
5113
5114 if (data) {
5115 tl_assert(!vdata);
5116 tl_assert(isOriginalAtom(mce, data));
5117 tl_assert(bias == 0);
5118 vdata = expr2vbits( mce, data );
5119 } else {
5120 tl_assert(vdata);
5121 }
5122
5123 tl_assert(isOriginalAtom(mce,addr));
5124 tl_assert(isShadowAtom(mce,vdata));
5125
5126 if (guard) {
5127 tl_assert(isOriginalAtom(mce, guard));
5128 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5129 }
5130
5131 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
5132
5133 // If we're not doing undefined value checking, pretend that this value
5134 // is "all valid". That lets Vex's optimiser remove some of the V bit
5135 // shadow computation ops that precede it.
5136 if (MC_(clo_mc_level) == 1) {
5137 switch (ty) {
5138 case Ity_V256: // V256 weirdness -- used four times
5139 c = IRConst_V256(V_BITS32_DEFINED); break;
5140 case Ity_V128: // V128 weirdness -- used twice
5141 c = IRConst_V128(V_BITS16_DEFINED); break;
5142 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
5143 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
5144 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
5145 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
5146 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5147 }
5148 vdata = IRExpr_Const( c );
5149 }
5150
5151 /* First, emit a definedness test for the address. This also sets
5152 the address (shadow) to 'defined' following the test. Both of
5153 those actions are gated on |guard|. */
5154 complainIfUndefined( mce, addr, guard );
5155
5156 /* Now decide which helper function to call to write the data V
5157 bits into shadow memory. */
5158 if (end == Iend_LE) {
5159 switch (ty) {
5160 case Ity_V256: /* we'll use the helper four times */
5161 case Ity_V128: /* we'll use the helper twice */
5162 case Ity_I64: helper = &MC_(helperc_STOREV64le);
5163 hname = "MC_(helperc_STOREV64le)";
5164 break;
5165 case Ity_I32: helper = &MC_(helperc_STOREV32le);
5166 hname = "MC_(helperc_STOREV32le)";
5167 break;
5168 case Ity_I16: helper = &MC_(helperc_STOREV16le);
5169 hname = "MC_(helperc_STOREV16le)";
5170 break;
5171 case Ity_I8: helper = &MC_(helperc_STOREV8);
5172 hname = "MC_(helperc_STOREV8)";
5173 break;
5174 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5175 }
5176 } else {
5177 switch (ty) {
5178 case Ity_V128: /* we'll use the helper twice */
5179 case Ity_I64: helper = &MC_(helperc_STOREV64be);
5180 hname = "MC_(helperc_STOREV64be)";
5181 break;
5182 case Ity_I32: helper = &MC_(helperc_STOREV32be);
5183 hname = "MC_(helperc_STOREV32be)";
5184 break;
5185 case Ity_I16: helper = &MC_(helperc_STOREV16be);
5186 hname = "MC_(helperc_STOREV16be)";
5187 break;
5188 case Ity_I8: helper = &MC_(helperc_STOREV8);
5189 hname = "MC_(helperc_STOREV8)";
5190 break;
5191 /* Note, no V256 case here, because no big-endian target that
5192 we support, has 256 vectors. */
5193 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
5194 }
5195 }
5196
5197 if (UNLIKELY(ty == Ity_V256)) {
5198
5199 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
5200 Q3 being the most significant lane. */
5201 /* These are the offsets of the Qs in memory. */
5202 Int offQ0, offQ1, offQ2, offQ3;
5203
5204 /* Various bits for constructing the 4 lane helper calls */
5205 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
5206 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
5207 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
5208 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
5209
5210 if (end == Iend_LE) {
5211 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
5212 } else {
5213 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
5214 }
5215
5216 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
5217 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
5218 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
5219 diQ0 = unsafeIRDirty_0_N(
5220 1/*regparms*/,
5221 hname, VG_(fnptr_to_fnentry)( helper ),
5222 mkIRExprVec_2( addrQ0, vdataQ0 )
5223 );
5224
5225 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
5226 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
5227 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
5228 diQ1 = unsafeIRDirty_0_N(
5229 1/*regparms*/,
5230 hname, VG_(fnptr_to_fnentry)( helper ),
5231 mkIRExprVec_2( addrQ1, vdataQ1 )
5232 );
5233
5234 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
5235 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
5236 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
5237 diQ2 = unsafeIRDirty_0_N(
5238 1/*regparms*/,
5239 hname, VG_(fnptr_to_fnentry)( helper ),
5240 mkIRExprVec_2( addrQ2, vdataQ2 )
5241 );
5242
5243 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
5244 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
5245 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
5246 diQ3 = unsafeIRDirty_0_N(
5247 1/*regparms*/,
5248 hname, VG_(fnptr_to_fnentry)( helper ),
5249 mkIRExprVec_2( addrQ3, vdataQ3 )
5250 );
5251
5252 if (guard)
5253 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
5254
5255 setHelperAnns( mce, diQ0 );
5256 setHelperAnns( mce, diQ1 );
5257 setHelperAnns( mce, diQ2 );
5258 setHelperAnns( mce, diQ3 );
5259 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
5260 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
5261 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
5262 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
5263
5264 }
5265 else if (UNLIKELY(ty == Ity_V128)) {
5266
5267 /* V128-bit case */
5268 /* See comment in next clause re 64-bit regparms */
5269 /* also, need to be careful about endianness */
5270
5271 Int offLo64, offHi64;
5272 IRDirty *diLo64, *diHi64;
5273 IRAtom *addrLo64, *addrHi64;
5274 IRAtom *vdataLo64, *vdataHi64;
5275 IRAtom *eBiasLo64, *eBiasHi64;
5276
5277 if (end == Iend_LE) {
5278 offLo64 = 0;
5279 offHi64 = 8;
5280 } else {
5281 offLo64 = 8;
5282 offHi64 = 0;
5283 }
5284
5285 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
5286 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
5287 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
5288 diLo64 = unsafeIRDirty_0_N(
5289 1/*regparms*/,
5290 hname, VG_(fnptr_to_fnentry)( helper ),
5291 mkIRExprVec_2( addrLo64, vdataLo64 )
5292 );
5293 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
5294 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
5295 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
5296 diHi64 = unsafeIRDirty_0_N(
5297 1/*regparms*/,
5298 hname, VG_(fnptr_to_fnentry)( helper ),
5299 mkIRExprVec_2( addrHi64, vdataHi64 )
5300 );
5301 if (guard) diLo64->guard = guard;
5302 if (guard) diHi64->guard = guard;
5303 setHelperAnns( mce, diLo64 );
5304 setHelperAnns( mce, diHi64 );
5305 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
5306 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
5307
5308 } else {
5309
5310 IRDirty *di;
5311 IRAtom *addrAct;
5312
5313 /* 8/16/32/64-bit cases */
5314 /* Generate the actual address into addrAct. */
5315 if (bias == 0) {
5316 addrAct = addr;
5317 } else {
5318 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
5319 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
5320 }
5321
5322 if (ty == Ity_I64) {
5323 /* We can't do this with regparm 2 on 32-bit platforms, since
5324 the back ends aren't clever enough to handle 64-bit
5325 regparm args. Therefore be different. */
5326 di = unsafeIRDirty_0_N(
5327 1/*regparms*/,
5328 hname, VG_(fnptr_to_fnentry)( helper ),
5329 mkIRExprVec_2( addrAct, vdata )
5330 );
5331 } else {
5332 di = unsafeIRDirty_0_N(
5333 2/*regparms*/,
5334 hname, VG_(fnptr_to_fnentry)( helper ),
5335 mkIRExprVec_2( addrAct,
5336 zwidenToHostWord( mce, vdata ))
5337 );
5338 }
5339 if (guard) di->guard = guard;
5340 setHelperAnns( mce, di );
5341 stmt( 'V', mce, IRStmt_Dirty(di) );
5342 }
5343
5344 }
5345
5346
5347 /* Do lazy pessimistic propagation through a dirty helper call, by
5348 looking at the annotations on it. This is the most complex part of
5349 Memcheck. */
5350
szToITy(Int n)5351 static IRType szToITy ( Int n )
5352 {
5353 switch (n) {
5354 case 1: return Ity_I8;
5355 case 2: return Ity_I16;
5356 case 4: return Ity_I32;
5357 case 8: return Ity_I64;
5358 default: VG_(tool_panic)("szToITy(memcheck)");
5359 }
5360 }
5361
5362 static
do_shadow_Dirty(MCEnv * mce,IRDirty * d)5363 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
5364 {
5365 Int i, k, n, toDo, gSz, gOff;
5366 IRAtom *src, *here, *curr;
5367 IRType tySrc, tyDst;
5368 IRTemp dst;
5369 IREndness end;
5370
5371 /* What's the native endianness? We need to know this. */
5372 # if defined(VG_BIGENDIAN)
5373 end = Iend_BE;
5374 # elif defined(VG_LITTLEENDIAN)
5375 end = Iend_LE;
5376 # else
5377 # error "Unknown endianness"
5378 # endif
5379
5380 /* First check the guard. */
5381 complainIfUndefined(mce, d->guard, NULL);
5382
5383 /* Now round up all inputs and PCast over them. */
5384 curr = definedOfType(Ity_I32);
5385
5386 /* Inputs: unmasked args
5387 Note: arguments are evaluated REGARDLESS of the guard expression */
5388 for (i = 0; d->args[i]; i++) {
5389 IRAtom* arg = d->args[i];
5390 if ( (d->cee->mcx_mask & (1<<i))
5391 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)) ) {
5392 /* ignore this arg */
5393 } else {
5394 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg) );
5395 curr = mkUifU32(mce, here, curr);
5396 }
5397 }
5398
5399 /* Inputs: guest state that we read. */
5400 for (i = 0; i < d->nFxState; i++) {
5401 tl_assert(d->fxState[i].fx != Ifx_None);
5402 if (d->fxState[i].fx == Ifx_Write)
5403 continue;
5404
5405 /* Enumerate the described state segments */
5406 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5407 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5408 gSz = d->fxState[i].size;
5409
5410 /* Ignore any sections marked as 'always defined'. */
5411 if (isAlwaysDefd(mce, gOff, gSz)) {
5412 if (0)
5413 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5414 gOff, gSz);
5415 continue;
5416 }
5417
5418 /* This state element is read or modified. So we need to
5419 consider it. If larger than 8 bytes, deal with it in
5420 8-byte chunks. */
5421 while (True) {
5422 tl_assert(gSz >= 0);
5423 if (gSz == 0) break;
5424 n = gSz <= 8 ? gSz : 8;
5425 /* update 'curr' with UifU of the state slice
5426 gOff .. gOff+n-1 */
5427 tySrc = szToITy( n );
5428
5429 /* Observe the guard expression. If it is false use an
5430 all-bits-defined bit pattern */
5431 IRAtom *cond, *iffalse, *iftrue;
5432
5433 cond = assignNew('V', mce, Ity_I1, d->guard);
5434 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
5435 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
5436 src = assignNew('V', mce, tySrc,
5437 IRExpr_ITE(cond, iftrue, iffalse));
5438
5439 here = mkPCastTo( mce, Ity_I32, src );
5440 curr = mkUifU32(mce, here, curr);
5441 gSz -= n;
5442 gOff += n;
5443 }
5444 }
5445 }
5446
5447 /* Inputs: memory. First set up some info needed regardless of
5448 whether we're doing reads or writes. */
5449
5450 if (d->mFx != Ifx_None) {
5451 /* Because we may do multiple shadow loads/stores from the same
5452 base address, it's best to do a single test of its
5453 definedness right now. Post-instrumentation optimisation
5454 should remove all but this test. */
5455 IRType tyAddr;
5456 tl_assert(d->mAddr);
5457 complainIfUndefined(mce, d->mAddr, d->guard);
5458
5459 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
5460 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
5461 tl_assert(tyAddr == mce->hWordTy); /* not really right */
5462 }
5463
5464 /* Deal with memory inputs (reads or modifies) */
5465 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
5466 toDo = d->mSize;
5467 /* chew off 32-bit chunks. We don't care about the endianness
5468 since it's all going to be condensed down to a single bit,
5469 but nevertheless choose an endianness which is hopefully
5470 native to the platform. */
5471 while (toDo >= 4) {
5472 here = mkPCastTo(
5473 mce, Ity_I32,
5474 expr2vbits_Load_guarded_Simple(
5475 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard )
5476 );
5477 curr = mkUifU32(mce, here, curr);
5478 toDo -= 4;
5479 }
5480 /* chew off 16-bit chunks */
5481 while (toDo >= 2) {
5482 here = mkPCastTo(
5483 mce, Ity_I32,
5484 expr2vbits_Load_guarded_Simple(
5485 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard )
5486 );
5487 curr = mkUifU32(mce, here, curr);
5488 toDo -= 2;
5489 }
5490 /* chew off the remaining 8-bit chunk, if any */
5491 if (toDo == 1) {
5492 here = mkPCastTo(
5493 mce, Ity_I32,
5494 expr2vbits_Load_guarded_Simple(
5495 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard )
5496 );
5497 curr = mkUifU32(mce, here, curr);
5498 toDo -= 1;
5499 }
5500 tl_assert(toDo == 0);
5501 }
5502
5503 /* Whew! So curr is a 32-bit V-value summarising pessimistically
5504 all the inputs to the helper. Now we need to re-distribute the
5505 results to all destinations. */
5506
5507 /* Outputs: the destination temporary, if there is one. */
5508 if (d->tmp != IRTemp_INVALID) {
5509 dst = findShadowTmpV(mce, d->tmp);
5510 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
5511 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
5512 }
5513
5514 /* Outputs: guest state that we write or modify. */
5515 for (i = 0; i < d->nFxState; i++) {
5516 tl_assert(d->fxState[i].fx != Ifx_None);
5517 if (d->fxState[i].fx == Ifx_Read)
5518 continue;
5519
5520 /* Enumerate the described state segments */
5521 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5522 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5523 gSz = d->fxState[i].size;
5524
5525 /* Ignore any sections marked as 'always defined'. */
5526 if (isAlwaysDefd(mce, gOff, gSz))
5527 continue;
5528
5529 /* This state element is written or modified. So we need to
5530 consider it. If larger than 8 bytes, deal with it in
5531 8-byte chunks. */
5532 while (True) {
5533 tl_assert(gSz >= 0);
5534 if (gSz == 0) break;
5535 n = gSz <= 8 ? gSz : 8;
5536 /* Write suitably-casted 'curr' to the state slice
5537 gOff .. gOff+n-1 */
5538 tyDst = szToITy( n );
5539 do_shadow_PUT( mce, gOff,
5540 NULL, /* original atom */
5541 mkPCastTo( mce, tyDst, curr ), d->guard );
5542 gSz -= n;
5543 gOff += n;
5544 }
5545 }
5546 }
5547
5548 /* Outputs: memory that we write or modify. Same comments about
5549 endianness as above apply. */
5550 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
5551 toDo = d->mSize;
5552 /* chew off 32-bit chunks */
5553 while (toDo >= 4) {
5554 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5555 NULL, /* original data */
5556 mkPCastTo( mce, Ity_I32, curr ),
5557 d->guard );
5558 toDo -= 4;
5559 }
5560 /* chew off 16-bit chunks */
5561 while (toDo >= 2) {
5562 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5563 NULL, /* original data */
5564 mkPCastTo( mce, Ity_I16, curr ),
5565 d->guard );
5566 toDo -= 2;
5567 }
5568 /* chew off the remaining 8-bit chunk, if any */
5569 if (toDo == 1) {
5570 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5571 NULL, /* original data */
5572 mkPCastTo( mce, Ity_I8, curr ),
5573 d->guard );
5574 toDo -= 1;
5575 }
5576 tl_assert(toDo == 0);
5577 }
5578
5579 }
5580
5581
5582 /* We have an ABI hint telling us that [base .. base+len-1] is to
5583 become undefined ("writable"). Generate code to call a helper to
5584 notify the A/V bit machinery of this fact.
5585
5586 We call
5587 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
5588 Addr nia );
5589 */
5590 static
do_AbiHint(MCEnv * mce,IRExpr * base,Int len,IRExpr * nia)5591 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
5592 {
5593 IRDirty* di;
5594
5595 if (MC_(clo_mc_level) == 3) {
5596 di = unsafeIRDirty_0_N(
5597 3/*regparms*/,
5598 "MC_(helperc_MAKE_STACK_UNINIT_w_o)",
5599 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_w_o) ),
5600 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
5601 );
5602 } else {
5603 /* We ignore the supplied nia, since it is irrelevant. */
5604 tl_assert(MC_(clo_mc_level) == 2 || MC_(clo_mc_level) == 1);
5605 /* Special-case the len==128 case, since that is for amd64-ELF,
5606 which is a very common target. */
5607 if (len == 128) {
5608 di = unsafeIRDirty_0_N(
5609 1/*regparms*/,
5610 "MC_(helperc_MAKE_STACK_UNINIT_128_no_o)",
5611 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_128_no_o)),
5612 mkIRExprVec_1( base )
5613 );
5614 } else {
5615 di = unsafeIRDirty_0_N(
5616 2/*regparms*/,
5617 "MC_(helperc_MAKE_STACK_UNINIT_no_o)",
5618 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_no_o) ),
5619 mkIRExprVec_2( base, mkIRExpr_HWord( (UInt)len) )
5620 );
5621 }
5622 }
5623
5624 stmt( 'V', mce, IRStmt_Dirty(di) );
5625 }
5626
5627
5628 /* ------ Dealing with IRCAS (big and complex) ------ */
5629
5630 /* FWDS */
5631 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5632 IRAtom* baseaddr, Int offset );
5633 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
5634 static void gen_store_b ( MCEnv* mce, Int szB,
5635 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5636 IRAtom* guard );
5637
5638 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
5639 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
5640
5641
5642 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
5643 IRExpr.Consts, else this asserts. If they are both Consts, it
5644 doesn't do anything. So that just leaves the RdTmp case.
5645
5646 In which case: this assigns the shadow value SHADOW to the IR
5647 shadow temporary associated with ORIG. That is, ORIG, being an
5648 original temporary, will have a shadow temporary associated with
5649 it. However, in the case envisaged here, there will so far have
5650 been no IR emitted to actually write a shadow value into that
5651 temporary. What this routine does is to (emit IR to) copy the
5652 value in SHADOW into said temporary, so that after this call,
5653 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
5654 value in SHADOW.
5655
5656 Point is to allow callers to compute "by hand" a shadow value for
5657 ORIG, and force it to be associated with ORIG.
5658
5659 How do we know that that shadow associated with ORIG has not so far
5660 been assigned to? Well, we don't per se know that, but supposing
5661 it had. Then this routine would create a second assignment to it,
5662 and later the IR sanity checker would barf. But that never
5663 happens. QED.
5664 */
bind_shadow_tmp_to_orig(UChar how,MCEnv * mce,IRAtom * orig,IRAtom * shadow)5665 static void bind_shadow_tmp_to_orig ( UChar how,
5666 MCEnv* mce,
5667 IRAtom* orig, IRAtom* shadow )
5668 {
5669 tl_assert(isOriginalAtom(mce, orig));
5670 tl_assert(isShadowAtom(mce, shadow));
5671 switch (orig->tag) {
5672 case Iex_Const:
5673 tl_assert(shadow->tag == Iex_Const);
5674 break;
5675 case Iex_RdTmp:
5676 tl_assert(shadow->tag == Iex_RdTmp);
5677 if (how == 'V') {
5678 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
5679 shadow);
5680 } else {
5681 tl_assert(how == 'B');
5682 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
5683 shadow);
5684 }
5685 break;
5686 default:
5687 tl_assert(0);
5688 }
5689 }
5690
5691
5692 static
do_shadow_CAS(MCEnv * mce,IRCAS * cas)5693 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
5694 {
5695 /* Scheme is (both single- and double- cases):
5696
5697 1. fetch data#,dataB (the proposed new value)
5698
5699 2. fetch expd#,expdB (what we expect to see at the address)
5700
5701 3. check definedness of address
5702
5703 4. load old#,oldB from shadow memory; this also checks
5704 addressibility of the address
5705
5706 5. the CAS itself
5707
5708 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
5709
5710 7. if "expected == old" (as computed by (6))
5711 store data#,dataB to shadow memory
5712
5713 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
5714 'data' but 7 stores 'data#'. Hence it is possible for the
5715 shadow data to be incorrectly checked and/or updated:
5716
5717 * 7 is at least gated correctly, since the 'expected == old'
5718 condition is derived from outputs of 5. However, the shadow
5719 write could happen too late: imagine after 5 we are
5720 descheduled, a different thread runs, writes a different
5721 (shadow) value at the address, and then we resume, hence
5722 overwriting the shadow value written by the other thread.
5723
5724 Because the original memory access is atomic, there's no way to
5725 make both the original and shadow accesses into a single atomic
5726 thing, hence this is unavoidable.
5727
5728 At least as Valgrind stands, I don't think it's a problem, since
5729 we're single threaded *and* we guarantee that there are no
5730 context switches during the execution of any specific superblock
5731 -- context switches can only happen at superblock boundaries.
5732
5733 If Valgrind ever becomes MT in the future, then it might be more
5734 of a problem. A possible kludge would be to artificially
5735 associate with the location, a lock, which we must acquire and
5736 release around the transaction as a whole. Hmm, that probably
5737 would't work properly since it only guards us against other
5738 threads doing CASs on the same location, not against other
5739 threads doing normal reads and writes.
5740
5741 ------------------------------------------------------------
5742
5743 COMMENT_ON_CasCmpEQ:
5744
5745 Note two things. Firstly, in the sequence above, we compute
5746 "expected == old", but we don't check definedness of it. Why
5747 not? Also, the x86 and amd64 front ends use
5748 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
5749 determination (expected == old ?) for themselves, and we also
5750 don't check definedness for those primops; we just say that the
5751 result is defined. Why? Details follow.
5752
5753 x86/amd64 contains various forms of locked insns:
5754 * lock prefix before all basic arithmetic insn;
5755 eg lock xorl %reg1,(%reg2)
5756 * atomic exchange reg-mem
5757 * compare-and-swaps
5758
5759 Rather than attempt to represent them all, which would be a
5760 royal PITA, I used a result from Maurice Herlihy
5761 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
5762 demonstrates that compare-and-swap is a primitive more general
5763 than the other two, and so can be used to represent all of them.
5764 So the translation scheme for (eg) lock incl (%reg) is as
5765 follows:
5766
5767 again:
5768 old = * %reg
5769 new = old + 1
5770 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
5771
5772 The "atomically" is the CAS bit. The scheme is always the same:
5773 get old value from memory, compute new value, atomically stuff
5774 new value back in memory iff the old value has not changed (iow,
5775 no other thread modified it in the meantime). If it has changed
5776 then we've been out-raced and we have to start over.
5777
5778 Now that's all very neat, but it has the bad side effect of
5779 introducing an explicit equality test into the translation.
5780 Consider the behaviour of said code on a memory location which
5781 is uninitialised. We will wind up doing a comparison on
5782 uninitialised data, and mc duly complains.
5783
5784 What's difficult about this is, the common case is that the
5785 location is uncontended, and so we're usually comparing the same
5786 value (* %reg) with itself. So we shouldn't complain even if it
5787 is undefined. But mc doesn't know that.
5788
5789 My solution is to mark the == in the IR specially, so as to tell
5790 mc that it almost certainly compares a value with itself, and we
5791 should just regard the result as always defined. Rather than
5792 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
5793 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
5794
5795 So there's always the question of, can this give a false
5796 negative? eg, imagine that initially, * %reg is defined; and we
5797 read that; but then in the gap between the read and the CAS, a
5798 different thread writes an undefined (and different) value at
5799 the location. Then the CAS in this thread will fail and we will
5800 go back to "again:", but without knowing that the trip back
5801 there was based on an undefined comparison. No matter; at least
5802 the other thread won the race and the location is correctly
5803 marked as undefined. What if it wrote an uninitialised version
5804 of the same value that was there originally, though?
5805
5806 etc etc. Seems like there's a small corner case in which we
5807 might lose the fact that something's defined -- we're out-raced
5808 in between the "old = * reg" and the "atomically {", _and_ the
5809 other thread is writing in an undefined version of what's
5810 already there. Well, that seems pretty unlikely.
5811
5812 ---
5813
5814 If we ever need to reinstate it .. code which generates a
5815 definedness test for "expected == old" was removed at r10432 of
5816 this file.
5817 */
5818 if (cas->oldHi == IRTemp_INVALID) {
5819 do_shadow_CAS_single( mce, cas );
5820 } else {
5821 do_shadow_CAS_double( mce, cas );
5822 }
5823 }
5824
5825
do_shadow_CAS_single(MCEnv * mce,IRCAS * cas)5826 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
5827 {
5828 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5829 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5830 IRAtom *voldLo = NULL, *boldLo = NULL;
5831 IRAtom *expd_eq_old = NULL;
5832 IROp opCasCmpEQ;
5833 Int elemSzB;
5834 IRType elemTy;
5835 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5836
5837 /* single CAS */
5838 tl_assert(cas->oldHi == IRTemp_INVALID);
5839 tl_assert(cas->expdHi == NULL);
5840 tl_assert(cas->dataHi == NULL);
5841
5842 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5843 switch (elemTy) {
5844 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
5845 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
5846 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
5847 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
5848 default: tl_assert(0); /* IR defn disallows any other types */
5849 }
5850
5851 /* 1. fetch data# (the proposed new value) */
5852 tl_assert(isOriginalAtom(mce, cas->dataLo));
5853 vdataLo
5854 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5855 tl_assert(isShadowAtom(mce, vdataLo));
5856 if (otrak) {
5857 bdataLo
5858 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5859 tl_assert(isShadowAtom(mce, bdataLo));
5860 }
5861
5862 /* 2. fetch expected# (what we expect to see at the address) */
5863 tl_assert(isOriginalAtom(mce, cas->expdLo));
5864 vexpdLo
5865 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5866 tl_assert(isShadowAtom(mce, vexpdLo));
5867 if (otrak) {
5868 bexpdLo
5869 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5870 tl_assert(isShadowAtom(mce, bexpdLo));
5871 }
5872
5873 /* 3. check definedness of address */
5874 /* 4. fetch old# from shadow memory; this also checks
5875 addressibility of the address */
5876 voldLo
5877 = assignNew(
5878 'V', mce, elemTy,
5879 expr2vbits_Load(
5880 mce,
5881 cas->end, elemTy, cas->addr, 0/*Addr bias*/,
5882 NULL/*always happens*/
5883 ));
5884 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
5885 if (otrak) {
5886 boldLo
5887 = assignNew('B', mce, Ity_I32,
5888 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
5889 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
5890 }
5891
5892 /* 5. the CAS itself */
5893 stmt( 'C', mce, IRStmt_CAS(cas) );
5894
5895 /* 6. compute "expected == old" */
5896 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
5897 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5898 tree, but it's not copied from the input block. */
5899 expd_eq_old
5900 = assignNew('C', mce, Ity_I1,
5901 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
5902
5903 /* 7. if "expected == old"
5904 store data# to shadow memory */
5905 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
5906 NULL/*data*/, vdataLo/*vdata*/,
5907 expd_eq_old/*guard for store*/ );
5908 if (otrak) {
5909 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
5910 bdataLo/*bdata*/,
5911 expd_eq_old/*guard for store*/ );
5912 }
5913 }
5914
5915
do_shadow_CAS_double(MCEnv * mce,IRCAS * cas)5916 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
5917 {
5918 IRAtom *vdataHi = NULL, *bdataHi = NULL;
5919 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5920 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
5921 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5922 IRAtom *voldHi = NULL, *boldHi = NULL;
5923 IRAtom *voldLo = NULL, *boldLo = NULL;
5924 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
5925 IRAtom *expd_eq_old = NULL, *zero = NULL;
5926 IROp opCasCmpEQ, opOr, opXor;
5927 Int elemSzB, memOffsLo, memOffsHi;
5928 IRType elemTy;
5929 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5930
5931 /* double CAS */
5932 tl_assert(cas->oldHi != IRTemp_INVALID);
5933 tl_assert(cas->expdHi != NULL);
5934 tl_assert(cas->dataHi != NULL);
5935
5936 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5937 switch (elemTy) {
5938 case Ity_I8:
5939 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
5940 elemSzB = 1; zero = mkU8(0);
5941 break;
5942 case Ity_I16:
5943 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
5944 elemSzB = 2; zero = mkU16(0);
5945 break;
5946 case Ity_I32:
5947 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
5948 elemSzB = 4; zero = mkU32(0);
5949 break;
5950 case Ity_I64:
5951 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
5952 elemSzB = 8; zero = mkU64(0);
5953 break;
5954 default:
5955 tl_assert(0); /* IR defn disallows any other types */
5956 }
5957
5958 /* 1. fetch data# (the proposed new value) */
5959 tl_assert(isOriginalAtom(mce, cas->dataHi));
5960 tl_assert(isOriginalAtom(mce, cas->dataLo));
5961 vdataHi
5962 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
5963 vdataLo
5964 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5965 tl_assert(isShadowAtom(mce, vdataHi));
5966 tl_assert(isShadowAtom(mce, vdataLo));
5967 if (otrak) {
5968 bdataHi
5969 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
5970 bdataLo
5971 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5972 tl_assert(isShadowAtom(mce, bdataHi));
5973 tl_assert(isShadowAtom(mce, bdataLo));
5974 }
5975
5976 /* 2. fetch expected# (what we expect to see at the address) */
5977 tl_assert(isOriginalAtom(mce, cas->expdHi));
5978 tl_assert(isOriginalAtom(mce, cas->expdLo));
5979 vexpdHi
5980 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5981 vexpdLo
5982 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5983 tl_assert(isShadowAtom(mce, vexpdHi));
5984 tl_assert(isShadowAtom(mce, vexpdLo));
5985 if (otrak) {
5986 bexpdHi
5987 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5988 bexpdLo
5989 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5990 tl_assert(isShadowAtom(mce, bexpdHi));
5991 tl_assert(isShadowAtom(mce, bexpdLo));
5992 }
5993
5994 /* 3. check definedness of address */
5995 /* 4. fetch old# from shadow memory; this also checks
5996 addressibility of the address */
5997 if (cas->end == Iend_LE) {
5998 memOffsLo = 0;
5999 memOffsHi = elemSzB;
6000 } else {
6001 tl_assert(cas->end == Iend_BE);
6002 memOffsLo = elemSzB;
6003 memOffsHi = 0;
6004 }
6005 voldHi
6006 = assignNew(
6007 'V', mce, elemTy,
6008 expr2vbits_Load(
6009 mce,
6010 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/,
6011 NULL/*always happens*/
6012 ));
6013 voldLo
6014 = assignNew(
6015 'V', mce, elemTy,
6016 expr2vbits_Load(
6017 mce,
6018 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/,
6019 NULL/*always happens*/
6020 ));
6021 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
6022 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
6023 if (otrak) {
6024 boldHi
6025 = assignNew('B', mce, Ity_I32,
6026 gen_load_b(mce, elemSzB, cas->addr,
6027 memOffsHi/*addr bias*/));
6028 boldLo
6029 = assignNew('B', mce, Ity_I32,
6030 gen_load_b(mce, elemSzB, cas->addr,
6031 memOffsLo/*addr bias*/));
6032 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
6033 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
6034 }
6035
6036 /* 5. the CAS itself */
6037 stmt( 'C', mce, IRStmt_CAS(cas) );
6038
6039 /* 6. compute "expected == old" */
6040 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6041 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6042 tree, but it's not copied from the input block. */
6043 /*
6044 xHi = oldHi ^ expdHi;
6045 xLo = oldLo ^ expdLo;
6046 xHL = xHi | xLo;
6047 expd_eq_old = xHL == 0;
6048 */
6049 xHi = assignNew('C', mce, elemTy,
6050 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
6051 xLo = assignNew('C', mce, elemTy,
6052 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
6053 xHL = assignNew('C', mce, elemTy,
6054 binop(opOr, xHi, xLo));
6055 expd_eq_old
6056 = assignNew('C', mce, Ity_I1,
6057 binop(opCasCmpEQ, xHL, zero));
6058
6059 /* 7. if "expected == old"
6060 store data# to shadow memory */
6061 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
6062 NULL/*data*/, vdataHi/*vdata*/,
6063 expd_eq_old/*guard for store*/ );
6064 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
6065 NULL/*data*/, vdataLo/*vdata*/,
6066 expd_eq_old/*guard for store*/ );
6067 if (otrak) {
6068 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
6069 bdataHi/*bdata*/,
6070 expd_eq_old/*guard for store*/ );
6071 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
6072 bdataLo/*bdata*/,
6073 expd_eq_old/*guard for store*/ );
6074 }
6075 }
6076
6077
6078 /* ------ Dealing with LL/SC (not difficult) ------ */
6079
do_shadow_LLSC(MCEnv * mce,IREndness stEnd,IRTemp stResult,IRExpr * stAddr,IRExpr * stStoredata)6080 static void do_shadow_LLSC ( MCEnv* mce,
6081 IREndness stEnd,
6082 IRTemp stResult,
6083 IRExpr* stAddr,
6084 IRExpr* stStoredata )
6085 {
6086 /* In short: treat a load-linked like a normal load followed by an
6087 assignment of the loaded (shadow) data to the result temporary.
6088 Treat a store-conditional like a normal store, and mark the
6089 result temporary as defined. */
6090 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
6091 IRTemp resTmp = findShadowTmpV(mce, stResult);
6092
6093 tl_assert(isIRAtom(stAddr));
6094 if (stStoredata)
6095 tl_assert(isIRAtom(stStoredata));
6096
6097 if (stStoredata == NULL) {
6098 /* Load Linked */
6099 /* Just treat this as a normal load, followed by an assignment of
6100 the value to .result. */
6101 /* Stay sane */
6102 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6103 || resTy == Ity_I16 || resTy == Ity_I8);
6104 assign( 'V', mce, resTmp,
6105 expr2vbits_Load(
6106 mce, stEnd, resTy, stAddr, 0/*addr bias*/,
6107 NULL/*always happens*/) );
6108 } else {
6109 /* Store Conditional */
6110 /* Stay sane */
6111 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
6112 stStoredata);
6113 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
6114 || dataTy == Ity_I16 || dataTy == Ity_I8);
6115 do_shadow_Store( mce, stEnd,
6116 stAddr, 0/* addr bias */,
6117 stStoredata,
6118 NULL /* shadow data */,
6119 NULL/*guard*/ );
6120 /* This is a store conditional, so it writes to .result a value
6121 indicating whether or not the store succeeded. Just claim
6122 this value is always defined. In the PowerPC interpretation
6123 of store-conditional, definedness of the success indication
6124 depends on whether the address of the store matches the
6125 reservation address. But we can't tell that here (and
6126 anyway, we're not being PowerPC-specific). At least we are
6127 guaranteed that the definedness of the store address, and its
6128 addressibility, will be checked as per normal. So it seems
6129 pretty safe to just say that the success indication is always
6130 defined.
6131
6132 In schemeS, for origin tracking, we must correspondingly set
6133 a no-origin value for the origin shadow of .result.
6134 */
6135 tl_assert(resTy == Ity_I1);
6136 assign( 'V', mce, resTmp, definedOfType(resTy) );
6137 }
6138 }
6139
6140
6141 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
6142
do_shadow_StoreG(MCEnv * mce,IRStoreG * sg)6143 static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg )
6144 {
6145 complainIfUndefined(mce, sg->guard, NULL);
6146 /* do_shadow_Store will generate code to check the definedness and
6147 validity of sg->addr, in the case where sg->guard evaluates to
6148 True at run-time. */
6149 do_shadow_Store( mce, sg->end,
6150 sg->addr, 0/* addr bias */,
6151 sg->data,
6152 NULL /* shadow data */,
6153 sg->guard );
6154 }
6155
do_shadow_LoadG(MCEnv * mce,IRLoadG * lg)6156 static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg )
6157 {
6158 complainIfUndefined(mce, lg->guard, NULL);
6159 /* expr2vbits_Load_guarded_General will generate code to check the
6160 definedness and validity of lg->addr, in the case where
6161 lg->guard evaluates to True at run-time. */
6162
6163 /* Look at the LoadG's built-in conversion operation, to determine
6164 the source (actual loaded data) type, and the equivalent IROp.
6165 NOTE that implicitly we are taking a widening operation to be
6166 applied to original atoms and producing one that applies to V
6167 bits. Since signed and unsigned widening are self-shadowing,
6168 this is a straight copy of the op (modulo swapping from the
6169 IRLoadGOp form to the IROp form). Note also therefore that this
6170 implicitly duplicates the logic to do with said widening ops in
6171 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
6172 IROp vwiden = Iop_INVALID;
6173 IRType loadedTy = Ity_INVALID;
6174 switch (lg->cvt) {
6175 case ILGop_IdentV128: loadedTy = Ity_V128; vwiden = Iop_INVALID; break;
6176 case ILGop_Ident64: loadedTy = Ity_I64; vwiden = Iop_INVALID; break;
6177 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break;
6178 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break;
6179 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break;
6180 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break;
6181 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break;
6182 default: VG_(tool_panic)("do_shadow_LoadG");
6183 }
6184
6185 IRAtom* vbits_alt
6186 = expr2vbits( mce, lg->alt );
6187 IRAtom* vbits_final
6188 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy,
6189 lg->addr, 0/*addr bias*/,
6190 lg->guard, vwiden, vbits_alt );
6191 /* And finally, bind the V bits to the destination temporary. */
6192 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final );
6193 }
6194
6195
6196 /*------------------------------------------------------------*/
6197 /*--- Memcheck main ---*/
6198 /*------------------------------------------------------------*/
6199
6200 static void schemeS ( MCEnv* mce, IRStmt* st );
6201
isBogusAtom(IRAtom * at)6202 static Bool isBogusAtom ( IRAtom* at )
6203 {
6204 ULong n = 0;
6205 IRConst* con;
6206 tl_assert(isIRAtom(at));
6207 if (at->tag == Iex_RdTmp)
6208 return False;
6209 tl_assert(at->tag == Iex_Const);
6210 con = at->Iex.Const.con;
6211 switch (con->tag) {
6212 case Ico_U1: return False;
6213 case Ico_U8: n = (ULong)con->Ico.U8; break;
6214 case Ico_U16: n = (ULong)con->Ico.U16; break;
6215 case Ico_U32: n = (ULong)con->Ico.U32; break;
6216 case Ico_U64: n = (ULong)con->Ico.U64; break;
6217 case Ico_F32: return False;
6218 case Ico_F64: return False;
6219 case Ico_F32i: return False;
6220 case Ico_F64i: return False;
6221 case Ico_V128: return False;
6222 case Ico_V256: return False;
6223 default: ppIRExpr(at); tl_assert(0);
6224 }
6225 /* VG_(printf)("%llx\n", n); */
6226 return (/*32*/ n == 0xFEFEFEFFULL
6227 /*32*/ || n == 0x80808080ULL
6228 /*32*/ || n == 0x7F7F7F7FULL
6229 /*32*/ || n == 0x7EFEFEFFULL
6230 /*32*/ || n == 0x81010100ULL
6231 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
6232 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
6233 /*64*/ || n == 0x0000000000008080ULL
6234 /*64*/ || n == 0x8080808080808080ULL
6235 /*64*/ || n == 0x0101010101010101ULL
6236 );
6237 }
6238
checkForBogusLiterals(IRStmt * st)6239 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
6240 {
6241 Int i;
6242 IRExpr* e;
6243 IRDirty* d;
6244 IRCAS* cas;
6245 switch (st->tag) {
6246 case Ist_WrTmp:
6247 e = st->Ist.WrTmp.data;
6248 switch (e->tag) {
6249 case Iex_Get:
6250 case Iex_RdTmp:
6251 return False;
6252 case Iex_Const:
6253 return isBogusAtom(e);
6254 case Iex_Unop:
6255 return isBogusAtom(e->Iex.Unop.arg)
6256 || e->Iex.Unop.op == Iop_GetMSBs8x16;
6257 case Iex_GetI:
6258 return isBogusAtom(e->Iex.GetI.ix);
6259 case Iex_Binop:
6260 return isBogusAtom(e->Iex.Binop.arg1)
6261 || isBogusAtom(e->Iex.Binop.arg2);
6262 case Iex_Triop:
6263 return isBogusAtom(e->Iex.Triop.details->arg1)
6264 || isBogusAtom(e->Iex.Triop.details->arg2)
6265 || isBogusAtom(e->Iex.Triop.details->arg3);
6266 case Iex_Qop:
6267 return isBogusAtom(e->Iex.Qop.details->arg1)
6268 || isBogusAtom(e->Iex.Qop.details->arg2)
6269 || isBogusAtom(e->Iex.Qop.details->arg3)
6270 || isBogusAtom(e->Iex.Qop.details->arg4);
6271 case Iex_ITE:
6272 return isBogusAtom(e->Iex.ITE.cond)
6273 || isBogusAtom(e->Iex.ITE.iftrue)
6274 || isBogusAtom(e->Iex.ITE.iffalse);
6275 case Iex_Load:
6276 return isBogusAtom(e->Iex.Load.addr);
6277 case Iex_CCall:
6278 for (i = 0; e->Iex.CCall.args[i]; i++)
6279 if (isBogusAtom(e->Iex.CCall.args[i]))
6280 return True;
6281 return False;
6282 default:
6283 goto unhandled;
6284 }
6285 case Ist_Dirty:
6286 d = st->Ist.Dirty.details;
6287 for (i = 0; d->args[i]; i++) {
6288 IRAtom* atom = d->args[i];
6289 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(atom))) {
6290 if (isBogusAtom(atom))
6291 return True;
6292 }
6293 }
6294 if (isBogusAtom(d->guard))
6295 return True;
6296 if (d->mAddr && isBogusAtom(d->mAddr))
6297 return True;
6298 return False;
6299 case Ist_Put:
6300 return isBogusAtom(st->Ist.Put.data);
6301 case Ist_PutI:
6302 return isBogusAtom(st->Ist.PutI.details->ix)
6303 || isBogusAtom(st->Ist.PutI.details->data);
6304 case Ist_Store:
6305 return isBogusAtom(st->Ist.Store.addr)
6306 || isBogusAtom(st->Ist.Store.data);
6307 case Ist_StoreG: {
6308 IRStoreG* sg = st->Ist.StoreG.details;
6309 return isBogusAtom(sg->addr) || isBogusAtom(sg->data)
6310 || isBogusAtom(sg->guard);
6311 }
6312 case Ist_LoadG: {
6313 IRLoadG* lg = st->Ist.LoadG.details;
6314 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt)
6315 || isBogusAtom(lg->guard);
6316 }
6317 case Ist_Exit:
6318 return isBogusAtom(st->Ist.Exit.guard);
6319 case Ist_AbiHint:
6320 return isBogusAtom(st->Ist.AbiHint.base)
6321 || isBogusAtom(st->Ist.AbiHint.nia);
6322 case Ist_NoOp:
6323 case Ist_IMark:
6324 case Ist_MBE:
6325 return False;
6326 case Ist_CAS:
6327 cas = st->Ist.CAS.details;
6328 return isBogusAtom(cas->addr)
6329 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
6330 || isBogusAtom(cas->expdLo)
6331 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
6332 || isBogusAtom(cas->dataLo);
6333 case Ist_LLSC:
6334 return isBogusAtom(st->Ist.LLSC.addr)
6335 || (st->Ist.LLSC.storedata
6336 ? isBogusAtom(st->Ist.LLSC.storedata)
6337 : False);
6338 default:
6339 unhandled:
6340 ppIRStmt(st);
6341 VG_(tool_panic)("hasBogusLiterals");
6342 }
6343 }
6344
6345
MC_(instrument)6346 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
6347 IRSB* sb_in,
6348 const VexGuestLayout* layout,
6349 const VexGuestExtents* vge,
6350 const VexArchInfo* archinfo_host,
6351 IRType gWordTy, IRType hWordTy )
6352 {
6353 Bool verboze = 0||False;
6354 Int i, j, first_stmt;
6355 IRStmt* st;
6356 MCEnv mce;
6357 IRSB* sb_out;
6358
6359 if (gWordTy != hWordTy) {
6360 /* We don't currently support this case. */
6361 VG_(tool_panic)("host/guest word size mismatch");
6362 }
6363
6364 /* Check we're not completely nuts */
6365 tl_assert(sizeof(UWord) == sizeof(void*));
6366 tl_assert(sizeof(Word) == sizeof(void*));
6367 tl_assert(sizeof(Addr) == sizeof(void*));
6368 tl_assert(sizeof(ULong) == 8);
6369 tl_assert(sizeof(Long) == 8);
6370 tl_assert(sizeof(UInt) == 4);
6371 tl_assert(sizeof(Int) == 4);
6372
6373 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
6374
6375 /* Set up SB */
6376 sb_out = deepCopyIRSBExceptStmts(sb_in);
6377
6378 /* Set up the running environment. Both .sb and .tmpMap are
6379 modified as we go along. Note that tmps are added to both
6380 .sb->tyenv and .tmpMap together, so the valid index-set for
6381 those two arrays should always be identical. */
6382 VG_(memset)(&mce, 0, sizeof(mce));
6383 mce.sb = sb_out;
6384 mce.trace = verboze;
6385 mce.layout = layout;
6386 mce.hWordTy = hWordTy;
6387 mce.bogusLiterals = False;
6388
6389 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
6390 Darwin. 10.7 is mostly built with LLVM, which uses these for
6391 bitfield inserts, and we get a lot of false errors if the cheap
6392 interpretation is used, alas. Could solve this much better if
6393 we knew which of such adds came from x86/amd64 LEA instructions,
6394 since these are the only ones really needing the expensive
6395 interpretation, but that would require some way to tag them in
6396 the _toIR.c front ends, which is a lot of faffing around. So
6397 for now just use the slow and blunt-instrument solution. */
6398 mce.useLLVMworkarounds = False;
6399 # if defined(VGO_darwin)
6400 mce.useLLVMworkarounds = True;
6401 # endif
6402
6403 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
6404 sizeof(TempMapEnt));
6405 VG_(hintSizeXA) (mce.tmpMap, sb_in->tyenv->types_used);
6406 for (i = 0; i < sb_in->tyenv->types_used; i++) {
6407 TempMapEnt ent;
6408 ent.kind = Orig;
6409 ent.shadowV = IRTemp_INVALID;
6410 ent.shadowB = IRTemp_INVALID;
6411 VG_(addToXA)( mce.tmpMap, &ent );
6412 }
6413 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
6414
6415 if (MC_(clo_expensive_definedness_checks)) {
6416 /* For expensive definedness checking skip looking for bogus
6417 literals. */
6418 mce.bogusLiterals = True;
6419 } else {
6420 /* Make a preliminary inspection of the statements, to see if there
6421 are any dodgy-looking literals. If there are, we generate
6422 extra-detailed (hence extra-expensive) instrumentation in
6423 places. Scan the whole bb even if dodgyness is found earlier,
6424 so that the flatness assertion is applied to all stmts. */
6425 Bool bogus = False;
6426
6427 for (i = 0; i < sb_in->stmts_used; i++) {
6428 st = sb_in->stmts[i];
6429 tl_assert(st);
6430 tl_assert(isFlatIRStmt(st));
6431
6432 if (!bogus) {
6433 bogus = checkForBogusLiterals(st);
6434 if (0 && bogus) {
6435 VG_(printf)("bogus: ");
6436 ppIRStmt(st);
6437 VG_(printf)("\n");
6438 }
6439 if (bogus) break;
6440 }
6441 }
6442 mce.bogusLiterals = bogus;
6443 }
6444
6445 /* Copy verbatim any IR preamble preceding the first IMark */
6446
6447 tl_assert(mce.sb == sb_out);
6448 tl_assert(mce.sb != sb_in);
6449
6450 i = 0;
6451 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
6452
6453 st = sb_in->stmts[i];
6454 tl_assert(st);
6455 tl_assert(isFlatIRStmt(st));
6456
6457 stmt( 'C', &mce, sb_in->stmts[i] );
6458 i++;
6459 }
6460
6461 /* Nasty problem. IR optimisation of the pre-instrumented IR may
6462 cause the IR following the preamble to contain references to IR
6463 temporaries defined in the preamble. Because the preamble isn't
6464 instrumented, these temporaries don't have any shadows.
6465 Nevertheless uses of them following the preamble will cause
6466 memcheck to generate references to their shadows. End effect is
6467 to cause IR sanity check failures, due to references to
6468 non-existent shadows. This is only evident for the complex
6469 preambles used for function wrapping on TOC-afflicted platforms
6470 (ppc64-linux).
6471
6472 The following loop therefore scans the preamble looking for
6473 assignments to temporaries. For each one found it creates an
6474 assignment to the corresponding (V) shadow temp, marking it as
6475 'defined'. This is the same resulting IR as if the main
6476 instrumentation loop before had been applied to the statement
6477 'tmp = CONSTANT'.
6478
6479 Similarly, if origin tracking is enabled, we must generate an
6480 assignment for the corresponding origin (B) shadow, claiming
6481 no-origin, as appropriate for a defined value.
6482 */
6483 for (j = 0; j < i; j++) {
6484 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
6485 /* findShadowTmpV checks its arg is an original tmp;
6486 no need to assert that here. */
6487 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
6488 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
6489 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
6490 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
6491 if (MC_(clo_mc_level) == 3) {
6492 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
6493 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
6494 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
6495 }
6496 if (0) {
6497 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
6498 ppIRType( ty_v );
6499 VG_(printf)("\n");
6500 }
6501 }
6502 }
6503
6504 /* Iterate over the remaining stmts to generate instrumentation. */
6505
6506 tl_assert(sb_in->stmts_used > 0);
6507 tl_assert(i >= 0);
6508 tl_assert(i < sb_in->stmts_used);
6509 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
6510
6511 for (/* use current i*/; i < sb_in->stmts_used; i++) {
6512
6513 st = sb_in->stmts[i];
6514 first_stmt = sb_out->stmts_used;
6515
6516 if (verboze) {
6517 VG_(printf)("\n");
6518 ppIRStmt(st);
6519 VG_(printf)("\n");
6520 }
6521
6522 if (MC_(clo_mc_level) == 3) {
6523 /* See comments on case Ist_CAS below. */
6524 if (st->tag != Ist_CAS)
6525 schemeS( &mce, st );
6526 }
6527
6528 /* Generate instrumentation code for each stmt ... */
6529
6530 switch (st->tag) {
6531
6532 case Ist_WrTmp:
6533 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
6534 expr2vbits( &mce, st->Ist.WrTmp.data) );
6535 break;
6536
6537 case Ist_Put:
6538 do_shadow_PUT( &mce,
6539 st->Ist.Put.offset,
6540 st->Ist.Put.data,
6541 NULL /* shadow atom */, NULL /* guard */ );
6542 break;
6543
6544 case Ist_PutI:
6545 do_shadow_PUTI( &mce, st->Ist.PutI.details);
6546 break;
6547
6548 case Ist_Store:
6549 do_shadow_Store( &mce, st->Ist.Store.end,
6550 st->Ist.Store.addr, 0/* addr bias */,
6551 st->Ist.Store.data,
6552 NULL /* shadow data */,
6553 NULL/*guard*/ );
6554 break;
6555
6556 case Ist_StoreG:
6557 do_shadow_StoreG( &mce, st->Ist.StoreG.details );
6558 break;
6559
6560 case Ist_LoadG:
6561 do_shadow_LoadG( &mce, st->Ist.LoadG.details );
6562 break;
6563
6564 case Ist_Exit:
6565 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
6566 break;
6567
6568 case Ist_IMark:
6569 break;
6570
6571 case Ist_NoOp:
6572 case Ist_MBE:
6573 break;
6574
6575 case Ist_Dirty:
6576 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
6577 break;
6578
6579 case Ist_AbiHint:
6580 do_AbiHint( &mce, st->Ist.AbiHint.base,
6581 st->Ist.AbiHint.len,
6582 st->Ist.AbiHint.nia );
6583 break;
6584
6585 case Ist_CAS:
6586 do_shadow_CAS( &mce, st->Ist.CAS.details );
6587 /* Note, do_shadow_CAS copies the CAS itself to the output
6588 block, because it needs to add instrumentation both
6589 before and after it. Hence skip the copy below. Also
6590 skip the origin-tracking stuff (call to schemeS) above,
6591 since that's all tangled up with it too; do_shadow_CAS
6592 does it all. */
6593 break;
6594
6595 case Ist_LLSC:
6596 do_shadow_LLSC( &mce,
6597 st->Ist.LLSC.end,
6598 st->Ist.LLSC.result,
6599 st->Ist.LLSC.addr,
6600 st->Ist.LLSC.storedata );
6601 break;
6602
6603 default:
6604 VG_(printf)("\n");
6605 ppIRStmt(st);
6606 VG_(printf)("\n");
6607 VG_(tool_panic)("memcheck: unhandled IRStmt");
6608
6609 } /* switch (st->tag) */
6610
6611 if (0 && verboze) {
6612 for (j = first_stmt; j < sb_out->stmts_used; j++) {
6613 VG_(printf)(" ");
6614 ppIRStmt(sb_out->stmts[j]);
6615 VG_(printf)("\n");
6616 }
6617 VG_(printf)("\n");
6618 }
6619
6620 /* ... and finally copy the stmt itself to the output. Except,
6621 skip the copy of IRCASs; see comments on case Ist_CAS
6622 above. */
6623 if (st->tag != Ist_CAS)
6624 stmt('C', &mce, st);
6625 }
6626
6627 /* Now we need to complain if the jump target is undefined. */
6628 first_stmt = sb_out->stmts_used;
6629
6630 if (verboze) {
6631 VG_(printf)("sb_in->next = ");
6632 ppIRExpr(sb_in->next);
6633 VG_(printf)("\n\n");
6634 }
6635
6636 complainIfUndefined( &mce, sb_in->next, NULL );
6637
6638 if (0 && verboze) {
6639 for (j = first_stmt; j < sb_out->stmts_used; j++) {
6640 VG_(printf)(" ");
6641 ppIRStmt(sb_out->stmts[j]);
6642 VG_(printf)("\n");
6643 }
6644 VG_(printf)("\n");
6645 }
6646
6647 /* If this fails, there's been some serious snafu with tmp management,
6648 that should be investigated. */
6649 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
6650 VG_(deleteXA)( mce.tmpMap );
6651
6652 tl_assert(mce.sb == sb_out);
6653 return sb_out;
6654 }
6655
6656
6657 /*------------------------------------------------------------*/
6658 /*--- Post-tree-build final tidying ---*/
6659 /*------------------------------------------------------------*/
6660
6661 /* This exploits the observation that Memcheck often produces
6662 repeated conditional calls of the form
6663
6664 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
6665
6666 with the same guard expression G guarding the same helper call.
6667 The second and subsequent calls are redundant. This usually
6668 results from instrumentation of guest code containing multiple
6669 memory references at different constant offsets from the same base
6670 register. After optimisation of the instrumentation, you get a
6671 test for the definedness of the base register for each memory
6672 reference, which is kinda pointless. MC_(final_tidy) therefore
6673 looks for such repeated calls and removes all but the first. */
6674
6675
6676 /* With some testing on perf/bz2.c, on amd64 and x86, compiled with
6677 gcc-5.3.1 -O2, it appears that 16 entries in the array are enough to
6678 get almost all the benefits of this transformation whilst causing
6679 the slide-back case to just often enough to be verifiably
6680 correct. For posterity, the numbers are:
6681
6682 bz2-32
6683
6684 1 4,336 (112,212 -> 1,709,473; ratio 15.2)
6685 2 4,336 (112,194 -> 1,669,895; ratio 14.9)
6686 3 4,336 (112,194 -> 1,660,713; ratio 14.8)
6687 4 4,336 (112,194 -> 1,658,555; ratio 14.8)
6688 5 4,336 (112,194 -> 1,655,447; ratio 14.8)
6689 6 4,336 (112,194 -> 1,655,101; ratio 14.8)
6690 7 4,336 (112,194 -> 1,654,858; ratio 14.7)
6691 8 4,336 (112,194 -> 1,654,810; ratio 14.7)
6692 10 4,336 (112,194 -> 1,654,621; ratio 14.7)
6693 12 4,336 (112,194 -> 1,654,678; ratio 14.7)
6694 16 4,336 (112,194 -> 1,654,494; ratio 14.7)
6695 32 4,336 (112,194 -> 1,654,602; ratio 14.7)
6696 inf 4,336 (112,194 -> 1,654,602; ratio 14.7)
6697
6698 bz2-64
6699
6700 1 4,113 (107,329 -> 1,822,171; ratio 17.0)
6701 2 4,113 (107,329 -> 1,806,443; ratio 16.8)
6702 3 4,113 (107,329 -> 1,803,967; ratio 16.8)
6703 4 4,113 (107,329 -> 1,802,785; ratio 16.8)
6704 5 4,113 (107,329 -> 1,802,412; ratio 16.8)
6705 6 4,113 (107,329 -> 1,802,062; ratio 16.8)
6706 7 4,113 (107,329 -> 1,801,976; ratio 16.8)
6707 8 4,113 (107,329 -> 1,801,886; ratio 16.8)
6708 10 4,113 (107,329 -> 1,801,653; ratio 16.8)
6709 12 4,113 (107,329 -> 1,801,526; ratio 16.8)
6710 16 4,113 (107,329 -> 1,801,298; ratio 16.8)
6711 32 4,113 (107,329 -> 1,800,827; ratio 16.8)
6712 inf 4,113 (107,329 -> 1,800,827; ratio 16.8)
6713 */
6714
6715 /* Structs for recording which (helper, guard) pairs we have already
6716 seen. */
6717
6718 #define N_TIDYING_PAIRS 16
6719
6720 typedef
6721 struct { void* entry; IRExpr* guard; }
6722 Pair;
6723
6724 typedef
6725 struct {
6726 Pair pairs[N_TIDYING_PAIRS +1/*for bounds checking*/];
6727 UInt pairsUsed;
6728 }
6729 Pairs;
6730
6731
6732 /* Return True if e1 and e2 definitely denote the same value (used to
6733 compare guards). Return False if unknown; False is the safe
6734 answer. Since guest registers and guest memory do not have the
6735 SSA property we must return False if any Gets or Loads appear in
6736 the expression. This implicitly assumes that e1 and e2 have the
6737 same IR type, which is always true here -- the type is Ity_I1. */
6738
sameIRValue(IRExpr * e1,IRExpr * e2)6739 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
6740 {
6741 if (e1->tag != e2->tag)
6742 return False;
6743 switch (e1->tag) {
6744 case Iex_Const:
6745 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
6746 case Iex_Binop:
6747 return e1->Iex.Binop.op == e2->Iex.Binop.op
6748 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
6749 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
6750 case Iex_Unop:
6751 return e1->Iex.Unop.op == e2->Iex.Unop.op
6752 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
6753 case Iex_RdTmp:
6754 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
6755 case Iex_ITE:
6756 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond )
6757 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue )
6758 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse );
6759 case Iex_Qop:
6760 case Iex_Triop:
6761 case Iex_CCall:
6762 /* be lazy. Could define equality for these, but they never
6763 appear to be used. */
6764 return False;
6765 case Iex_Get:
6766 case Iex_GetI:
6767 case Iex_Load:
6768 /* be conservative - these may not give the same value each
6769 time */
6770 return False;
6771 case Iex_Binder:
6772 /* should never see this */
6773 /* fallthrough */
6774 default:
6775 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
6776 ppIRExpr(e1);
6777 VG_(tool_panic)("memcheck:sameIRValue");
6778 return False;
6779 }
6780 }
6781
6782 /* See if 'pairs' already has an entry for (entry, guard). Return
6783 True if so. If not, add an entry. */
6784
6785 static
check_or_add(Pairs * tidyingEnv,IRExpr * guard,void * entry)6786 Bool check_or_add ( Pairs* tidyingEnv, IRExpr* guard, void* entry )
6787 {
6788 UInt i, n = tidyingEnv->pairsUsed;
6789 tl_assert(n <= N_TIDYING_PAIRS);
6790 for (i = 0; i < n; i++) {
6791 if (tidyingEnv->pairs[i].entry == entry
6792 && sameIRValue(tidyingEnv->pairs[i].guard, guard))
6793 return True;
6794 }
6795 /* (guard, entry) wasn't found in the array. Add it at the end.
6796 If the array is already full, slide the entries one slot
6797 backwards. This means we will lose to ability to detect
6798 duplicates from the pair in slot zero, but that happens so
6799 rarely that it's unlikely to have much effect on overall code
6800 quality. Also, this strategy loses the check for the oldest
6801 tracked exit (memory reference, basically) and so that is (I'd
6802 guess) least likely to be re-used after this point. */
6803 tl_assert(i == n);
6804 if (n == N_TIDYING_PAIRS) {
6805 for (i = 1; i < N_TIDYING_PAIRS; i++) {
6806 tidyingEnv->pairs[i-1] = tidyingEnv->pairs[i];
6807 }
6808 tidyingEnv->pairs[N_TIDYING_PAIRS-1].entry = entry;
6809 tidyingEnv->pairs[N_TIDYING_PAIRS-1].guard = guard;
6810 } else {
6811 tl_assert(n < N_TIDYING_PAIRS);
6812 tidyingEnv->pairs[n].entry = entry;
6813 tidyingEnv->pairs[n].guard = guard;
6814 n++;
6815 tidyingEnv->pairsUsed = n;
6816 }
6817 return False;
6818 }
6819
is_helperc_value_checkN_fail(const HChar * name)6820 static Bool is_helperc_value_checkN_fail ( const HChar* name )
6821 {
6822 /* This is expensive because it happens a lot. We are checking to
6823 see whether |name| is one of the following 8 strings:
6824
6825 MC_(helperc_value_check8_fail_no_o)
6826 MC_(helperc_value_check4_fail_no_o)
6827 MC_(helperc_value_check0_fail_no_o)
6828 MC_(helperc_value_check1_fail_no_o)
6829 MC_(helperc_value_check8_fail_w_o)
6830 MC_(helperc_value_check0_fail_w_o)
6831 MC_(helperc_value_check1_fail_w_o)
6832 MC_(helperc_value_check4_fail_w_o)
6833
6834 To speed it up, check the common prefix just once, rather than
6835 all 8 times.
6836 */
6837 const HChar* prefix = "MC_(helperc_value_check";
6838
6839 HChar n, p;
6840 while (True) {
6841 n = *name;
6842 p = *prefix;
6843 if (p == 0) break; /* ran off the end of the prefix */
6844 /* We still have some prefix to use */
6845 if (n == 0) return False; /* have prefix, but name ran out */
6846 if (n != p) return False; /* have both pfx and name, but no match */
6847 name++;
6848 prefix++;
6849 }
6850
6851 /* Check the part after the prefix. */
6852 tl_assert(*prefix == 0 && *name != 0);
6853 return 0==VG_(strcmp)(name, "8_fail_no_o)")
6854 || 0==VG_(strcmp)(name, "4_fail_no_o)")
6855 || 0==VG_(strcmp)(name, "0_fail_no_o)")
6856 || 0==VG_(strcmp)(name, "1_fail_no_o)")
6857 || 0==VG_(strcmp)(name, "8_fail_w_o)")
6858 || 0==VG_(strcmp)(name, "4_fail_w_o)")
6859 || 0==VG_(strcmp)(name, "0_fail_w_o)")
6860 || 0==VG_(strcmp)(name, "1_fail_w_o)");
6861 }
6862
MC_(final_tidy)6863 IRSB* MC_(final_tidy) ( IRSB* sb_in )
6864 {
6865 Int i;
6866 IRStmt* st;
6867 IRDirty* di;
6868 IRExpr* guard;
6869 IRCallee* cee;
6870 Bool alreadyPresent;
6871 Pairs pairs;
6872
6873 pairs.pairsUsed = 0;
6874
6875 pairs.pairs[N_TIDYING_PAIRS].entry = (void*)0x123;
6876 pairs.pairs[N_TIDYING_PAIRS].guard = (IRExpr*)0x456;
6877
6878 /* Scan forwards through the statements. Each time a call to one
6879 of the relevant helpers is seen, check if we have made a
6880 previous call to the same helper using the same guard
6881 expression, and if so, delete the call. */
6882 for (i = 0; i < sb_in->stmts_used; i++) {
6883 st = sb_in->stmts[i];
6884 tl_assert(st);
6885 if (st->tag != Ist_Dirty)
6886 continue;
6887 di = st->Ist.Dirty.details;
6888 guard = di->guard;
6889 tl_assert(guard);
6890 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
6891 cee = di->cee;
6892 if (!is_helperc_value_checkN_fail( cee->name ))
6893 continue;
6894 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
6895 guard 'guard'. Check if we have already seen a call to this
6896 function with the same guard. If so, delete it. If not,
6897 add it to the set of calls we do know about. */
6898 alreadyPresent = check_or_add( &pairs, guard, cee->addr );
6899 if (alreadyPresent) {
6900 sb_in->stmts[i] = IRStmt_NoOp();
6901 if (0) VG_(printf)("XX\n");
6902 }
6903 }
6904
6905 tl_assert(pairs.pairs[N_TIDYING_PAIRS].entry == (void*)0x123);
6906 tl_assert(pairs.pairs[N_TIDYING_PAIRS].guard == (IRExpr*)0x456);
6907
6908 return sb_in;
6909 }
6910
6911 #undef N_TIDYING_PAIRS
6912
6913
6914 /*------------------------------------------------------------*/
6915 /*--- Origin tracking stuff ---*/
6916 /*------------------------------------------------------------*/
6917
6918 /* Almost identical to findShadowTmpV. */
findShadowTmpB(MCEnv * mce,IRTemp orig)6919 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
6920 {
6921 TempMapEnt* ent;
6922 /* VG_(indexXA) range-checks 'orig', hence no need to check
6923 here. */
6924 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6925 tl_assert(ent->kind == Orig);
6926 if (ent->shadowB == IRTemp_INVALID) {
6927 IRTemp tmpB
6928 = newTemp( mce, Ity_I32, BSh );
6929 /* newTemp may cause mce->tmpMap to resize, hence previous results
6930 from VG_(indexXA) are invalid. */
6931 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6932 tl_assert(ent->kind == Orig);
6933 tl_assert(ent->shadowB == IRTemp_INVALID);
6934 ent->shadowB = tmpB;
6935 }
6936 return ent->shadowB;
6937 }
6938
gen_maxU32(MCEnv * mce,IRAtom * b1,IRAtom * b2)6939 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
6940 {
6941 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
6942 }
6943
6944
6945 /* Make a guarded origin load, with no special handling in the
6946 didn't-happen case. A GUARD of NULL is assumed to mean "always
6947 True".
6948
6949 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6950 return the otag. The loaded size is SZB. If GUARD evaluates to
6951 False at run time then the returned otag is zero.
6952 */
gen_guarded_load_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset,IRExpr * guard)6953 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB,
6954 IRAtom* baseaddr,
6955 Int offset, IRExpr* guard )
6956 {
6957 void* hFun;
6958 const HChar* hName;
6959 IRTemp bTmp;
6960 IRDirty* di;
6961 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
6962 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6963 IRAtom* ea = baseaddr;
6964 if (offset != 0) {
6965 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6966 : mkU64( (Long)(Int)offset );
6967 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6968 }
6969 bTmp = newTemp(mce, mce->hWordTy, BSh);
6970
6971 switch (szB) {
6972 case 1: hFun = (void*)&MC_(helperc_b_load1);
6973 hName = "MC_(helperc_b_load1)";
6974 break;
6975 case 2: hFun = (void*)&MC_(helperc_b_load2);
6976 hName = "MC_(helperc_b_load2)";
6977 break;
6978 case 4: hFun = (void*)&MC_(helperc_b_load4);
6979 hName = "MC_(helperc_b_load4)";
6980 break;
6981 case 8: hFun = (void*)&MC_(helperc_b_load8);
6982 hName = "MC_(helperc_b_load8)";
6983 break;
6984 case 16: hFun = (void*)&MC_(helperc_b_load16);
6985 hName = "MC_(helperc_b_load16)";
6986 break;
6987 case 32: hFun = (void*)&MC_(helperc_b_load32);
6988 hName = "MC_(helperc_b_load32)";
6989 break;
6990 default:
6991 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
6992 tl_assert(0);
6993 }
6994 di = unsafeIRDirty_1_N(
6995 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
6996 mkIRExprVec_1( ea )
6997 );
6998 if (guard) {
6999 di->guard = guard;
7000 /* Ideally the didn't-happen return value here would be
7001 all-zeroes (unknown-origin), so it'd be harmless if it got
7002 used inadvertently. We slum it out with the IR-mandated
7003 default value (0b01 repeating, 0x55 etc) as that'll probably
7004 trump all legitimate otags via Max32, and it's pretty
7005 obviously bogus. */
7006 }
7007 /* no need to mess with any annotations. This call accesses
7008 neither guest state nor guest memory. */
7009 stmt( 'B', mce, IRStmt_Dirty(di) );
7010 if (mce->hWordTy == Ity_I64) {
7011 /* 64-bit host */
7012 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
7013 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
7014 return mkexpr(bTmp32);
7015 } else {
7016 /* 32-bit host */
7017 return mkexpr(bTmp);
7018 }
7019 }
7020
7021
7022 /* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
7023 loaded size is SZB. The load is regarded as unconditional (always
7024 happens).
7025 */
gen_load_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset)7026 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
7027 Int offset )
7028 {
7029 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/);
7030 }
7031
7032
7033 /* The most general handler for guarded origin loads. A GUARD of NULL
7034 is assumed to mean "always True".
7035
7036 Generate IR to do a shadow origin load from ADDR+BIAS and return
7037 the B bits. The loaded type is TY. If GUARD evaluates to False at
7038 run time then the returned B bits are simply BALT instead.
7039 */
7040 static
expr2ori_Load_guarded_General(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard,IRAtom * balt)7041 IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce,
7042 IRType ty,
7043 IRAtom* addr, UInt bias,
7044 IRAtom* guard, IRAtom* balt )
7045 {
7046 /* If the guard evaluates to True, this will hold the loaded
7047 origin. If the guard evaluates to False, this will be zero,
7048 meaning "unknown origin", in which case we will have to replace
7049 it using an ITE below. */
7050 IRAtom* iftrue
7051 = assignNew('B', mce, Ity_I32,
7052 gen_guarded_load_b(mce, sizeofIRType(ty),
7053 addr, bias, guard));
7054 /* These are the bits we will return if the load doesn't take
7055 place. */
7056 IRAtom* iffalse
7057 = balt;
7058 /* Prepare the cond for the ITE. Convert a NULL cond into
7059 something that iropt knows how to fold out later. */
7060 IRAtom* cond
7061 = guard == NULL ? mkU1(1) : guard;
7062 /* And assemble the final result. */
7063 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse));
7064 }
7065
7066
7067 /* Generate a shadow origins store. guard :: Ity_I1 controls whether
7068 the store really happens; NULL means it unconditionally does. */
gen_store_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset,IRAtom * dataB,IRAtom * guard)7069 static void gen_store_b ( MCEnv* mce, Int szB,
7070 IRAtom* baseaddr, Int offset, IRAtom* dataB,
7071 IRAtom* guard )
7072 {
7073 void* hFun;
7074 const HChar* hName;
7075 IRDirty* di;
7076 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
7077 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
7078 IRAtom* ea = baseaddr;
7079 if (guard) {
7080 tl_assert(isOriginalAtom(mce, guard));
7081 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
7082 }
7083 if (offset != 0) {
7084 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
7085 : mkU64( (Long)(Int)offset );
7086 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
7087 }
7088 if (mce->hWordTy == Ity_I64)
7089 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
7090
7091 switch (szB) {
7092 case 1: hFun = (void*)&MC_(helperc_b_store1);
7093 hName = "MC_(helperc_b_store1)";
7094 break;
7095 case 2: hFun = (void*)&MC_(helperc_b_store2);
7096 hName = "MC_(helperc_b_store2)";
7097 break;
7098 case 4: hFun = (void*)&MC_(helperc_b_store4);
7099 hName = "MC_(helperc_b_store4)";
7100 break;
7101 case 8: hFun = (void*)&MC_(helperc_b_store8);
7102 hName = "MC_(helperc_b_store8)";
7103 break;
7104 case 16: hFun = (void*)&MC_(helperc_b_store16);
7105 hName = "MC_(helperc_b_store16)";
7106 break;
7107 case 32: hFun = (void*)&MC_(helperc_b_store32);
7108 hName = "MC_(helperc_b_store32)";
7109 break;
7110 default:
7111 tl_assert(0);
7112 }
7113 di = unsafeIRDirty_0_N( 2/*regparms*/,
7114 hName, VG_(fnptr_to_fnentry)( hFun ),
7115 mkIRExprVec_2( ea, dataB )
7116 );
7117 /* no need to mess with any annotations. This call accesses
7118 neither guest state nor guest memory. */
7119 if (guard) di->guard = guard;
7120 stmt( 'B', mce, IRStmt_Dirty(di) );
7121 }
7122
narrowTo32(MCEnv * mce,IRAtom * e)7123 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
7124 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
7125 if (eTy == Ity_I64)
7126 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
7127 if (eTy == Ity_I32)
7128 return e;
7129 tl_assert(0);
7130 }
7131
zWidenFrom32(MCEnv * mce,IRType dstTy,IRAtom * e)7132 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
7133 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
7134 tl_assert(eTy == Ity_I32);
7135 if (dstTy == Ity_I64)
7136 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
7137 tl_assert(0);
7138 }
7139
7140
schemeE(MCEnv * mce,IRExpr * e)7141 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
7142 {
7143 tl_assert(MC_(clo_mc_level) == 3);
7144
7145 switch (e->tag) {
7146
7147 case Iex_GetI: {
7148 IRRegArray* descr_b;
7149 IRAtom *t1, *t2, *t3, *t4;
7150 IRRegArray* descr = e->Iex.GetI.descr;
7151 IRType equivIntTy
7152 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
7153 /* If this array is unshadowable for whatever reason, use the
7154 usual approximation. */
7155 if (equivIntTy == Ity_INVALID)
7156 return mkU32(0);
7157 tl_assert(sizeofIRType(equivIntTy) >= 4);
7158 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
7159 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
7160 equivIntTy, descr->nElems );
7161 /* Do a shadow indexed get of the same size, giving t1. Take
7162 the bottom 32 bits of it, giving t2. Compute into t3 the
7163 origin for the index (almost certainly zero, but there's
7164 no harm in being completely general here, since iropt will
7165 remove any useless code), and fold it in, giving a final
7166 value t4. */
7167 t1 = assignNew( 'B', mce, equivIntTy,
7168 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
7169 e->Iex.GetI.bias ));
7170 t2 = narrowTo32( mce, t1 );
7171 t3 = schemeE( mce, e->Iex.GetI.ix );
7172 t4 = gen_maxU32( mce, t2, t3 );
7173 return t4;
7174 }
7175 case Iex_CCall: {
7176 Int i;
7177 IRAtom* here;
7178 IRExpr** args = e->Iex.CCall.args;
7179 IRAtom* curr = mkU32(0);
7180 for (i = 0; args[i]; i++) {
7181 tl_assert(i < 32);
7182 tl_assert(isOriginalAtom(mce, args[i]));
7183 /* Only take notice of this arg if the callee's
7184 mc-exclusion mask does not say it is to be excluded. */
7185 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
7186 /* the arg is to be excluded from definedness checking.
7187 Do nothing. */
7188 if (0) VG_(printf)("excluding %s(%d)\n",
7189 e->Iex.CCall.cee->name, i);
7190 } else {
7191 /* calculate the arg's definedness, and pessimistically
7192 merge it in. */
7193 here = schemeE( mce, args[i] );
7194 curr = gen_maxU32( mce, curr, here );
7195 }
7196 }
7197 return curr;
7198 }
7199 case Iex_Load: {
7200 Int dszB;
7201 dszB = sizeofIRType(e->Iex.Load.ty);
7202 /* assert that the B value for the address is already
7203 available (somewhere) */
7204 tl_assert(isIRAtom(e->Iex.Load.addr));
7205 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
7206 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
7207 }
7208 case Iex_ITE: {
7209 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond );
7210 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue );
7211 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse );
7212 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
7213 }
7214 case Iex_Qop: {
7215 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
7216 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
7217 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
7218 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
7219 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
7220 gen_maxU32( mce, b3, b4 ) );
7221 }
7222 case Iex_Triop: {
7223 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
7224 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
7225 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
7226 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
7227 }
7228 case Iex_Binop: {
7229 switch (e->Iex.Binop.op) {
7230 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
7231 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
7232 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
7233 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
7234 /* Just say these all produce a defined result,
7235 regardless of their arguments. See
7236 COMMENT_ON_CasCmpEQ in this file. */
7237 return mkU32(0);
7238 default: {
7239 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
7240 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
7241 return gen_maxU32( mce, b1, b2 );
7242 }
7243 }
7244 tl_assert(0);
7245 /*NOTREACHED*/
7246 }
7247 case Iex_Unop: {
7248 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
7249 return b1;
7250 }
7251 case Iex_Const:
7252 return mkU32(0);
7253 case Iex_RdTmp:
7254 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
7255 case Iex_Get: {
7256 Int b_offset = MC_(get_otrack_shadow_offset)(
7257 e->Iex.Get.offset,
7258 sizeofIRType(e->Iex.Get.ty)
7259 );
7260 tl_assert(b_offset >= -1
7261 && b_offset <= mce->layout->total_sizeB -4);
7262 if (b_offset >= 0) {
7263 /* FIXME: this isn't an atom! */
7264 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
7265 Ity_I32 );
7266 }
7267 return mkU32(0);
7268 }
7269 default:
7270 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
7271 ppIRExpr(e);
7272 VG_(tool_panic)("memcheck:schemeE");
7273 }
7274 }
7275
7276
do_origins_Dirty(MCEnv * mce,IRDirty * d)7277 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
7278 {
7279 // This is a hacked version of do_shadow_Dirty
7280 Int i, k, n, toDo, gSz, gOff;
7281 IRAtom *here, *curr;
7282 IRTemp dst;
7283
7284 /* First check the guard. */
7285 curr = schemeE( mce, d->guard );
7286
7287 /* Now round up all inputs and maxU32 over them. */
7288
7289 /* Inputs: unmasked args
7290 Note: arguments are evaluated REGARDLESS of the guard expression */
7291 for (i = 0; d->args[i]; i++) {
7292 IRAtom* arg = d->args[i];
7293 if ( (d->cee->mcx_mask & (1<<i))
7294 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)) ) {
7295 /* ignore this arg */
7296 } else {
7297 here = schemeE( mce, arg );
7298 curr = gen_maxU32( mce, curr, here );
7299 }
7300 }
7301
7302 /* Inputs: guest state that we read. */
7303 for (i = 0; i < d->nFxState; i++) {
7304 tl_assert(d->fxState[i].fx != Ifx_None);
7305 if (d->fxState[i].fx == Ifx_Write)
7306 continue;
7307
7308 /* Enumerate the described state segments */
7309 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
7310 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
7311 gSz = d->fxState[i].size;
7312
7313 /* Ignore any sections marked as 'always defined'. */
7314 if (isAlwaysDefd(mce, gOff, gSz)) {
7315 if (0)
7316 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
7317 gOff, gSz);
7318 continue;
7319 }
7320
7321 /* This state element is read or modified. So we need to
7322 consider it. If larger than 4 bytes, deal with it in
7323 4-byte chunks. */
7324 while (True) {
7325 Int b_offset;
7326 tl_assert(gSz >= 0);
7327 if (gSz == 0) break;
7328 n = gSz <= 4 ? gSz : 4;
7329 /* update 'curr' with maxU32 of the state slice
7330 gOff .. gOff+n-1 */
7331 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
7332 if (b_offset != -1) {
7333 /* Observe the guard expression. If it is false use 0, i.e.
7334 nothing is known about the origin */
7335 IRAtom *cond, *iffalse, *iftrue;
7336
7337 cond = assignNew( 'B', mce, Ity_I1, d->guard);
7338 iffalse = mkU32(0);
7339 iftrue = assignNew( 'B', mce, Ity_I32,
7340 IRExpr_Get(b_offset
7341 + 2*mce->layout->total_sizeB,
7342 Ity_I32));
7343 here = assignNew( 'B', mce, Ity_I32,
7344 IRExpr_ITE(cond, iftrue, iffalse));
7345 curr = gen_maxU32( mce, curr, here );
7346 }
7347 gSz -= n;
7348 gOff += n;
7349 }
7350 }
7351 }
7352
7353 /* Inputs: memory */
7354
7355 if (d->mFx != Ifx_None) {
7356 /* Because we may do multiple shadow loads/stores from the same
7357 base address, it's best to do a single test of its
7358 definedness right now. Post-instrumentation optimisation
7359 should remove all but this test. */
7360 tl_assert(d->mAddr);
7361 here = schemeE( mce, d->mAddr );
7362 curr = gen_maxU32( mce, curr, here );
7363 }
7364
7365 /* Deal with memory inputs (reads or modifies) */
7366 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
7367 toDo = d->mSize;
7368 /* chew off 32-bit chunks. We don't care about the endianness
7369 since it's all going to be condensed down to a single bit,
7370 but nevertheless choose an endianness which is hopefully
7371 native to the platform. */
7372 while (toDo >= 4) {
7373 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
7374 d->guard );
7375 curr = gen_maxU32( mce, curr, here );
7376 toDo -= 4;
7377 }
7378 /* handle possible 16-bit excess */
7379 while (toDo >= 2) {
7380 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
7381 d->guard );
7382 curr = gen_maxU32( mce, curr, here );
7383 toDo -= 2;
7384 }
7385 /* chew off the remaining 8-bit chunk, if any */
7386 if (toDo == 1) {
7387 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
7388 d->guard );
7389 curr = gen_maxU32( mce, curr, here );
7390 toDo -= 1;
7391 }
7392 tl_assert(toDo == 0);
7393 }
7394
7395 /* Whew! So curr is a 32-bit B-value which should give an origin
7396 of some use if any of the inputs to the helper are undefined.
7397 Now we need to re-distribute the results to all destinations. */
7398
7399 /* Outputs: the destination temporary, if there is one. */
7400 if (d->tmp != IRTemp_INVALID) {
7401 dst = findShadowTmpB(mce, d->tmp);
7402 assign( 'V', mce, dst, curr );
7403 }
7404
7405 /* Outputs: guest state that we write or modify. */
7406 for (i = 0; i < d->nFxState; i++) {
7407 tl_assert(d->fxState[i].fx != Ifx_None);
7408 if (d->fxState[i].fx == Ifx_Read)
7409 continue;
7410
7411 /* Enumerate the described state segments */
7412 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
7413 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
7414 gSz = d->fxState[i].size;
7415
7416 /* Ignore any sections marked as 'always defined'. */
7417 if (isAlwaysDefd(mce, gOff, gSz))
7418 continue;
7419
7420 /* This state element is written or modified. So we need to
7421 consider it. If larger than 4 bytes, deal with it in
7422 4-byte chunks. */
7423 while (True) {
7424 Int b_offset;
7425 tl_assert(gSz >= 0);
7426 if (gSz == 0) break;
7427 n = gSz <= 4 ? gSz : 4;
7428 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
7429 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
7430 if (b_offset != -1) {
7431
7432 /* If the guard expression evaluates to false we simply Put
7433 the value that is already stored in the guest state slot */
7434 IRAtom *cond, *iffalse;
7435
7436 cond = assignNew('B', mce, Ity_I1,
7437 d->guard);
7438 iffalse = assignNew('B', mce, Ity_I32,
7439 IRExpr_Get(b_offset +
7440 2*mce->layout->total_sizeB,
7441 Ity_I32));
7442 curr = assignNew('V', mce, Ity_I32,
7443 IRExpr_ITE(cond, curr, iffalse));
7444
7445 stmt( 'B', mce, IRStmt_Put(b_offset
7446 + 2*mce->layout->total_sizeB,
7447 curr ));
7448 }
7449 gSz -= n;
7450 gOff += n;
7451 }
7452 }
7453 }
7454
7455 /* Outputs: memory that we write or modify. Same comments about
7456 endianness as above apply. */
7457 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
7458 toDo = d->mSize;
7459 /* chew off 32-bit chunks */
7460 while (toDo >= 4) {
7461 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
7462 d->guard );
7463 toDo -= 4;
7464 }
7465 /* handle possible 16-bit excess */
7466 while (toDo >= 2) {
7467 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
7468 d->guard );
7469 toDo -= 2;
7470 }
7471 /* chew off the remaining 8-bit chunk, if any */
7472 if (toDo == 1) {
7473 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
7474 d->guard );
7475 toDo -= 1;
7476 }
7477 tl_assert(toDo == 0);
7478 }
7479 }
7480
7481
7482 /* Generate IR for origin shadowing for a general guarded store. */
do_origins_Store_guarded(MCEnv * mce,IREndness stEnd,IRExpr * stAddr,IRExpr * stData,IRExpr * guard)7483 static void do_origins_Store_guarded ( MCEnv* mce,
7484 IREndness stEnd,
7485 IRExpr* stAddr,
7486 IRExpr* stData,
7487 IRExpr* guard )
7488 {
7489 Int dszB;
7490 IRAtom* dataB;
7491 /* assert that the B value for the address is already available
7492 (somewhere), since the call to schemeE will want to see it.
7493 XXXX how does this actually ensure that?? */
7494 tl_assert(isIRAtom(stAddr));
7495 tl_assert(isIRAtom(stData));
7496 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
7497 dataB = schemeE( mce, stData );
7498 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard );
7499 }
7500
7501
7502 /* Generate IR for origin shadowing for a plain store. */
do_origins_Store_plain(MCEnv * mce,IREndness stEnd,IRExpr * stAddr,IRExpr * stData)7503 static void do_origins_Store_plain ( MCEnv* mce,
7504 IREndness stEnd,
7505 IRExpr* stAddr,
7506 IRExpr* stData )
7507 {
7508 do_origins_Store_guarded ( mce, stEnd, stAddr, stData,
7509 NULL/*guard*/ );
7510 }
7511
7512
7513 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
7514
do_origins_StoreG(MCEnv * mce,IRStoreG * sg)7515 static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg )
7516 {
7517 do_origins_Store_guarded( mce, sg->end, sg->addr,
7518 sg->data, sg->guard );
7519 }
7520
do_origins_LoadG(MCEnv * mce,IRLoadG * lg)7521 static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg )
7522 {
7523 IRType loadedTy = Ity_INVALID;
7524 switch (lg->cvt) {
7525 case ILGop_IdentV128: loadedTy = Ity_V128; break;
7526 case ILGop_Ident64: loadedTy = Ity_I64; break;
7527 case ILGop_Ident32: loadedTy = Ity_I32; break;
7528 case ILGop_16Uto32: loadedTy = Ity_I16; break;
7529 case ILGop_16Sto32: loadedTy = Ity_I16; break;
7530 case ILGop_8Uto32: loadedTy = Ity_I8; break;
7531 case ILGop_8Sto32: loadedTy = Ity_I8; break;
7532 default: VG_(tool_panic)("schemeS.IRLoadG");
7533 }
7534 IRAtom* ori_alt
7535 = schemeE( mce,lg->alt );
7536 IRAtom* ori_final
7537 = expr2ori_Load_guarded_General(mce, loadedTy,
7538 lg->addr, 0/*addr bias*/,
7539 lg->guard, ori_alt );
7540 /* And finally, bind the origin to the destination temporary. */
7541 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final );
7542 }
7543
7544
schemeS(MCEnv * mce,IRStmt * st)7545 static void schemeS ( MCEnv* mce, IRStmt* st )
7546 {
7547 tl_assert(MC_(clo_mc_level) == 3);
7548
7549 switch (st->tag) {
7550
7551 case Ist_AbiHint:
7552 /* The value-check instrumenter handles this - by arranging
7553 to pass the address of the next instruction to
7554 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
7555 happen for origin tracking w.r.t. AbiHints. So there is
7556 nothing to do here. */
7557 break;
7558
7559 case Ist_PutI: {
7560 IRPutI *puti = st->Ist.PutI.details;
7561 IRRegArray* descr_b;
7562 IRAtom *t1, *t2, *t3, *t4;
7563 IRRegArray* descr = puti->descr;
7564 IRType equivIntTy
7565 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
7566 /* If this array is unshadowable for whatever reason,
7567 generate no code. */
7568 if (equivIntTy == Ity_INVALID)
7569 break;
7570 tl_assert(sizeofIRType(equivIntTy) >= 4);
7571 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
7572 descr_b
7573 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
7574 equivIntTy, descr->nElems );
7575 /* Compute a value to Put - the conjoinment of the origin for
7576 the data to be Put-ted (obviously) and of the index value
7577 (not so obviously). */
7578 t1 = schemeE( mce, puti->data );
7579 t2 = schemeE( mce, puti->ix );
7580 t3 = gen_maxU32( mce, t1, t2 );
7581 t4 = zWidenFrom32( mce, equivIntTy, t3 );
7582 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
7583 puti->bias, t4) ));
7584 break;
7585 }
7586
7587 case Ist_Dirty:
7588 do_origins_Dirty( mce, st->Ist.Dirty.details );
7589 break;
7590
7591 case Ist_Store:
7592 do_origins_Store_plain( mce, st->Ist.Store.end,
7593 st->Ist.Store.addr,
7594 st->Ist.Store.data );
7595 break;
7596
7597 case Ist_StoreG:
7598 do_origins_StoreG( mce, st->Ist.StoreG.details );
7599 break;
7600
7601 case Ist_LoadG:
7602 do_origins_LoadG( mce, st->Ist.LoadG.details );
7603 break;
7604
7605 case Ist_LLSC: {
7606 /* In short: treat a load-linked like a normal load followed
7607 by an assignment of the loaded (shadow) data the result
7608 temporary. Treat a store-conditional like a normal store,
7609 and mark the result temporary as defined. */
7610 if (st->Ist.LLSC.storedata == NULL) {
7611 /* Load Linked */
7612 IRType resTy
7613 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
7614 IRExpr* vanillaLoad
7615 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
7616 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
7617 || resTy == Ity_I16 || resTy == Ity_I8);
7618 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7619 schemeE(mce, vanillaLoad));
7620 } else {
7621 /* Store conditional */
7622 do_origins_Store_plain( mce, st->Ist.LLSC.end,
7623 st->Ist.LLSC.addr,
7624 st->Ist.LLSC.storedata );
7625 /* For the rationale behind this, see comments at the
7626 place where the V-shadow for .result is constructed, in
7627 do_shadow_LLSC. In short, we regard .result as
7628 always-defined. */
7629 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7630 mkU32(0) );
7631 }
7632 break;
7633 }
7634
7635 case Ist_Put: {
7636 Int b_offset
7637 = MC_(get_otrack_shadow_offset)(
7638 st->Ist.Put.offset,
7639 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
7640 );
7641 if (b_offset >= 0) {
7642 /* FIXME: this isn't an atom! */
7643 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
7644 schemeE( mce, st->Ist.Put.data )) );
7645 }
7646 break;
7647 }
7648
7649 case Ist_WrTmp:
7650 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
7651 schemeE(mce, st->Ist.WrTmp.data) );
7652 break;
7653
7654 case Ist_MBE:
7655 case Ist_NoOp:
7656 case Ist_Exit:
7657 case Ist_IMark:
7658 break;
7659
7660 default:
7661 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
7662 ppIRStmt(st);
7663 VG_(tool_panic)("memcheck:schemeS");
7664 }
7665 }
7666
7667
7668 /*------------------------------------------------------------*/
7669 /*--- Startup assertion checking ---*/
7670 /*------------------------------------------------------------*/
7671
MC_(do_instrumentation_startup_checks)7672 void MC_(do_instrumentation_startup_checks)( void )
7673 {
7674 /* Make a best-effort check to see that is_helperc_value_checkN_fail
7675 is working as we expect. */
7676
7677 # define CHECK(_expected, _string) \
7678 tl_assert((_expected) == is_helperc_value_checkN_fail(_string))
7679
7680 /* It should identify these 8, and no others, as targets. */
7681 CHECK(True, "MC_(helperc_value_check8_fail_no_o)");
7682 CHECK(True, "MC_(helperc_value_check4_fail_no_o)");
7683 CHECK(True, "MC_(helperc_value_check0_fail_no_o)");
7684 CHECK(True, "MC_(helperc_value_check1_fail_no_o)");
7685 CHECK(True, "MC_(helperc_value_check8_fail_w_o)");
7686 CHECK(True, "MC_(helperc_value_check0_fail_w_o)");
7687 CHECK(True, "MC_(helperc_value_check1_fail_w_o)");
7688 CHECK(True, "MC_(helperc_value_check4_fail_w_o)");
7689
7690 /* Ad-hoc selection of other strings gathered via a quick test. */
7691 CHECK(False, "amd64g_dirtyhelper_CPUID_avx2");
7692 CHECK(False, "amd64g_dirtyhelper_RDTSC");
7693 CHECK(False, "MC_(helperc_b_load1)");
7694 CHECK(False, "MC_(helperc_b_load2)");
7695 CHECK(False, "MC_(helperc_b_load4)");
7696 CHECK(False, "MC_(helperc_b_load8)");
7697 CHECK(False, "MC_(helperc_b_load16)");
7698 CHECK(False, "MC_(helperc_b_load32)");
7699 CHECK(False, "MC_(helperc_b_store1)");
7700 CHECK(False, "MC_(helperc_b_store2)");
7701 CHECK(False, "MC_(helperc_b_store4)");
7702 CHECK(False, "MC_(helperc_b_store8)");
7703 CHECK(False, "MC_(helperc_b_store16)");
7704 CHECK(False, "MC_(helperc_b_store32)");
7705 CHECK(False, "MC_(helperc_LOADV8)");
7706 CHECK(False, "MC_(helperc_LOADV16le)");
7707 CHECK(False, "MC_(helperc_LOADV32le)");
7708 CHECK(False, "MC_(helperc_LOADV64le)");
7709 CHECK(False, "MC_(helperc_LOADV128le)");
7710 CHECK(False, "MC_(helperc_LOADV256le)");
7711 CHECK(False, "MC_(helperc_STOREV16le)");
7712 CHECK(False, "MC_(helperc_STOREV32le)");
7713 CHECK(False, "MC_(helperc_STOREV64le)");
7714 CHECK(False, "MC_(helperc_STOREV8)");
7715 CHECK(False, "track_die_mem_stack_8");
7716 CHECK(False, "track_new_mem_stack_8_w_ECU");
7717 CHECK(False, "MC_(helperc_MAKE_STACK_UNINIT_w_o)");
7718 CHECK(False, "VG_(unknown_SP_update_w_ECU)");
7719
7720 # undef CHECK
7721 }
7722
7723
7724 /*--------------------------------------------------------------------*/
7725 /*--- end mc_translate.c ---*/
7726 /*--------------------------------------------------------------------*/
7727