1
2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations. ---*/
4 /*--- mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of MemCheck, a heavyweight Valgrind tool for
9 detecting memory errors.
10
11 Copyright (C) 2000-2012 Julian Seward
12 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #include "pub_tool_basics.h"
33 #include "pub_tool_poolalloc.h" // For mc_include.h
34 #include "pub_tool_hashtable.h" // For mc_include.h
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_tooliface.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_xarray.h"
40 #include "pub_tool_mallocfree.h"
41 #include "pub_tool_libcbase.h"
42
43 #include "mc_include.h"
44
45
46 /* FIXMEs JRS 2011-June-16.
47
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
51 */
52
53 /* This file implements the Memcheck instrumentation, and in
54 particular contains the core of its undefined value detection
55 machinery. For a comprehensive background of the terminology,
56 algorithms and rationale used herein, read:
57
58 Using Valgrind to detect undefined value errors with
59 bit-precision
60
61 Julian Seward and Nicholas Nethercote
62
63 2005 USENIX Annual Technical Conference (General Track),
64 Anaheim, CA, USA, April 10-15, 2005.
65
66 ----
67
68 Here is as good a place as any to record exactly when V bits are and
69 should be checked, why, and what function is responsible.
70
71
72 Memcheck complains when an undefined value is used:
73
74 1. In the condition of a conditional branch. Because it could cause
75 incorrect control flow, and thus cause incorrect externally-visible
76 behaviour. [mc_translate.c:complainIfUndefined]
77
78 2. As an argument to a system call, or as the value that specifies
79 the system call number. Because it could cause an incorrect
80 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
81
82 3. As the address in a load or store. Because it could cause an
83 incorrect value to be used later, which could cause externally-visible
84 behaviour (eg. via incorrect control flow or an incorrect system call
85 argument) [complainIfUndefined]
86
87 4. As the target address of a branch. Because it could cause incorrect
88 control flow. [complainIfUndefined]
89
90 5. As an argument to setenv, unsetenv, or putenv. Because it could put
91 an incorrect value into the external environment.
92 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
93
94 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
95 [complainIfUndefined]
96
97 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
98 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
99 requested it. [in memcheck.h]
100
101
102 Memcheck also complains, but should not, when an undefined value is used:
103
104 8. As the shift value in certain SIMD shift operations (but not in the
105 standard integer shift operations). This inconsistency is due to
106 historical reasons.) [complainIfUndefined]
107
108
109 Memcheck does not complain, but should, when an undefined value is used:
110
111 9. As an input to a client request. Because the client request may
112 affect the visible behaviour -- see bug #144362 for an example
113 involving the malloc replacements in vg_replace_malloc.c and
114 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
115 isn't identified. That bug report also has some info on how to solve
116 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
117
118
119 In practice, 1 and 2 account for the vast majority of cases.
120 */
121
122 /*------------------------------------------------------------*/
123 /*--- Forward decls ---*/
124 /*------------------------------------------------------------*/
125
126 struct _MCEnv;
127
128 static IRType shadowTypeV ( IRType ty );
129 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
130 static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
131
132 static IRExpr *i128_const_zero(void);
133
134 /*------------------------------------------------------------*/
135 /*--- Memcheck running state, and tmp management. ---*/
136 /*------------------------------------------------------------*/
137
138 /* Carries info about a particular tmp. The tmp's number is not
139 recorded, as this is implied by (equal to) its index in the tmpMap
140 in MCEnv. The tmp's type is also not recorded, as this is present
141 in MCEnv.sb->tyenv.
142
143 When .kind is Orig, .shadowV and .shadowB may give the identities
144 of the temps currently holding the associated definedness (shadowV)
145 and origin (shadowB) values, or these may be IRTemp_INVALID if code
146 to compute such values has not yet been emitted.
147
148 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
149 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
150 illogical for a shadow tmp itself to be shadowed.
151 */
152 typedef
153 enum { Orig=1, VSh=2, BSh=3 }
154 TempKind;
155
156 typedef
157 struct {
158 TempKind kind;
159 IRTemp shadowV;
160 IRTemp shadowB;
161 }
162 TempMapEnt;
163
164
165 /* Carries around state during memcheck instrumentation. */
166 typedef
167 struct _MCEnv {
168 /* MODIFIED: the superblock being constructed. IRStmts are
169 added. */
170 IRSB* sb;
171 Bool trace;
172
173 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
174 current kind and possibly shadow temps for each temp in the
175 IRSB being constructed. Note that it does not contain the
176 type of each tmp. If you want to know the type, look at the
177 relevant entry in sb->tyenv. It follows that at all times
178 during the instrumentation process, the valid indices for
179 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
180 total number of Orig, V- and B- temps allocated so far.
181
182 The reason for this strange split (types in one place, all
183 other info in another) is that we need the types to be
184 attached to sb so as to make it possible to do
185 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
186 instrumentation process. */
187 XArray* /* of TempMapEnt */ tmpMap;
188
189 /* MODIFIED: indicates whether "bogus" literals have so far been
190 found. Starts off False, and may change to True. */
191 Bool bogusLiterals;
192
193 /* READONLY: indicates whether we should use expensive
194 interpretations of integer adds, since unfortunately LLVM
195 uses them to do ORs in some circumstances. Defaulted to True
196 on MacOS and False everywhere else. */
197 Bool useLLVMworkarounds;
198
199 /* READONLY: the guest layout. This indicates which parts of
200 the guest state should be regarded as 'always defined'. */
201 VexGuestLayout* layout;
202
203 /* READONLY: the host word type. Needed for constructing
204 arguments of type 'HWord' to be passed to helper functions.
205 Ity_I32 or Ity_I64 only. */
206 IRType hWordTy;
207 }
208 MCEnv;
209
210 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
211 demand), as they are encountered. This is for two reasons.
212
213 (1) (less important reason): Many original tmps are unused due to
214 initial IR optimisation, and we do not want to spaces in tables
215 tracking them.
216
217 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
218 table indexed [0 .. n_types-1], which gives the current shadow for
219 each original tmp, or INVALID_IRTEMP if none is so far assigned.
220 It is necessary to support making multiple assignments to a shadow
221 -- specifically, after testing a shadow for definedness, it needs
222 to be made defined. But IR's SSA property disallows this.
223
224 (2) (more important reason): Therefore, when a shadow needs to get
225 a new value, a new temporary is created, the value is assigned to
226 that, and the tmpMap is updated to reflect the new binding.
227
228 A corollary is that if the tmpMap maps a given tmp to
229 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
230 there's a read-before-write error in the original tmps. The IR
231 sanity checker should catch all such anomalies, however.
232 */
233
234 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
235 both the table in mce->sb and to our auxiliary mapping. Note that
236 newTemp may cause mce->tmpMap to resize, hence previous results
237 from VG_(indexXA)(mce->tmpMap) are invalidated. */
newTemp(MCEnv * mce,IRType ty,TempKind kind)238 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
239 {
240 Word newIx;
241 TempMapEnt ent;
242 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
243 ent.kind = kind;
244 ent.shadowV = IRTemp_INVALID;
245 ent.shadowB = IRTemp_INVALID;
246 newIx = VG_(addToXA)( mce->tmpMap, &ent );
247 tl_assert(newIx == (Word)tmp);
248 return tmp;
249 }
250
251
252 /* Find the tmp currently shadowing the given original tmp. If none
253 so far exists, allocate one. */
findShadowTmpV(MCEnv * mce,IRTemp orig)254 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
255 {
256 TempMapEnt* ent;
257 /* VG_(indexXA) range-checks 'orig', hence no need to check
258 here. */
259 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
260 tl_assert(ent->kind == Orig);
261 if (ent->shadowV == IRTemp_INVALID) {
262 IRTemp tmpV
263 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
264 /* newTemp may cause mce->tmpMap to resize, hence previous results
265 from VG_(indexXA) are invalid. */
266 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
267 tl_assert(ent->kind == Orig);
268 tl_assert(ent->shadowV == IRTemp_INVALID);
269 ent->shadowV = tmpV;
270 }
271 return ent->shadowV;
272 }
273
274 /* Allocate a new shadow for the given original tmp. This means any
275 previous shadow is abandoned. This is needed because it is
276 necessary to give a new value to a shadow once it has been tested
277 for undefinedness, but unfortunately IR's SSA property disallows
278 this. Instead we must abandon the old shadow, allocate a new one
279 and use that instead.
280
281 This is the same as findShadowTmpV, except we don't bother to see
282 if a shadow temp already existed -- we simply allocate a new one
283 regardless. */
newShadowTmpV(MCEnv * mce,IRTemp orig)284 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
285 {
286 TempMapEnt* ent;
287 /* VG_(indexXA) range-checks 'orig', hence no need to check
288 here. */
289 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
290 tl_assert(ent->kind == Orig);
291 if (1) {
292 IRTemp tmpV
293 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
294 /* newTemp may cause mce->tmpMap to resize, hence previous results
295 from VG_(indexXA) are invalid. */
296 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
297 tl_assert(ent->kind == Orig);
298 ent->shadowV = tmpV;
299 }
300 }
301
302
303 /*------------------------------------------------------------*/
304 /*--- IRAtoms -- a subset of IRExprs ---*/
305 /*------------------------------------------------------------*/
306
307 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
308 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
309 input, most of this code deals in atoms. Usefully, a value atom
310 always has a V-value which is also an atom: constants are shadowed
311 by constants, and temps are shadowed by the corresponding shadow
312 temporary. */
313
314 typedef IRExpr IRAtom;
315
316 /* (used for sanity checks only): is this an atom which looks
317 like it's from original code? */
isOriginalAtom(MCEnv * mce,IRAtom * a1)318 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
319 {
320 if (a1->tag == Iex_Const)
321 return True;
322 if (a1->tag == Iex_RdTmp) {
323 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
324 return ent->kind == Orig;
325 }
326 return False;
327 }
328
329 /* (used for sanity checks only): is this an atom which looks
330 like it's from shadow code? */
isShadowAtom(MCEnv * mce,IRAtom * a1)331 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
332 {
333 if (a1->tag == Iex_Const)
334 return True;
335 if (a1->tag == Iex_RdTmp) {
336 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
337 return ent->kind == VSh || ent->kind == BSh;
338 }
339 return False;
340 }
341
342 /* (used for sanity checks only): check that both args are atoms and
343 are identically-kinded. */
sameKindedAtoms(IRAtom * a1,IRAtom * a2)344 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
345 {
346 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
347 return True;
348 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
349 return True;
350 return False;
351 }
352
353
354 /*------------------------------------------------------------*/
355 /*--- Type management ---*/
356 /*------------------------------------------------------------*/
357
358 /* Shadow state is always accessed using integer types. This returns
359 an integer type with the same size (as per sizeofIRType) as the
360 given type. The only valid shadow types are Bit, I8, I16, I32,
361 I64, I128, V128, V256. */
362
shadowTypeV(IRType ty)363 static IRType shadowTypeV ( IRType ty )
364 {
365 switch (ty) {
366 case Ity_I1:
367 case Ity_I8:
368 case Ity_I16:
369 case Ity_I32:
370 case Ity_I64:
371 case Ity_I128: return ty;
372 case Ity_F32: return Ity_I32;
373 case Ity_D32: return Ity_I32;
374 case Ity_F64: return Ity_I64;
375 case Ity_D64: return Ity_I64;
376 case Ity_F128: return Ity_I128;
377 case Ity_D128: return Ity_I128;
378 case Ity_V128: return Ity_V128;
379 case Ity_V256: return Ity_V256;
380 default: ppIRType(ty);
381 VG_(tool_panic)("memcheck:shadowTypeV");
382 }
383 }
384
385 /* Produce a 'defined' value of the given shadow type. Should only be
386 supplied shadow types (Bit/I8/I16/I32/UI64). */
definedOfType(IRType ty)387 static IRExpr* definedOfType ( IRType ty ) {
388 switch (ty) {
389 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
390 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
391 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
392 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
393 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
394 case Ity_I128: return i128_const_zero();
395 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
396 default: VG_(tool_panic)("memcheck:definedOfType");
397 }
398 }
399
400
401 /*------------------------------------------------------------*/
402 /*--- Constructing IR fragments ---*/
403 /*------------------------------------------------------------*/
404
405 /* add stmt to a bb */
stmt(HChar cat,MCEnv * mce,IRStmt * st)406 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
407 if (mce->trace) {
408 VG_(printf)(" %c: ", cat);
409 ppIRStmt(st);
410 VG_(printf)("\n");
411 }
412 addStmtToIRSB(mce->sb, st);
413 }
414
415 /* assign value to tmp */
416 static inline
assign(HChar cat,MCEnv * mce,IRTemp tmp,IRExpr * expr)417 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
418 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
419 }
420
421 /* build various kinds of expressions */
422 #define triop(_op, _arg1, _arg2, _arg3) \
423 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
424 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
425 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
426 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
427 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
428 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
429 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
430 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
431 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
432
433 /* Bind the given expression to a new temporary, and return the
434 temporary. This effectively converts an arbitrary expression into
435 an atom.
436
437 'ty' is the type of 'e' and hence the type that the new temporary
438 needs to be. But passing it in is redundant, since we can deduce
439 the type merely by inspecting 'e'. So at least use that fact to
440 assert that the two types agree. */
assignNew(HChar cat,MCEnv * mce,IRType ty,IRExpr * e)441 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
442 {
443 TempKind k;
444 IRTemp t;
445 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
446
447 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
448 switch (cat) {
449 case 'V': k = VSh; break;
450 case 'B': k = BSh; break;
451 case 'C': k = Orig; break;
452 /* happens when we are making up new "orig"
453 expressions, for IRCAS handling */
454 default: tl_assert(0);
455 }
456 t = newTemp(mce, ty, k);
457 assign(cat, mce, t, e);
458 return mkexpr(t);
459 }
460
461
462 /*------------------------------------------------------------*/
463 /*--- Helper functions for 128-bit ops ---*/
464 /*------------------------------------------------------------*/
465
i128_const_zero(void)466 static IRExpr *i128_const_zero(void)
467 {
468 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
469 return binop(Iop_64HLto128, z64, z64);
470 }
471
472 /* There are no I128-bit loads and/or stores [as generated by any
473 current front ends]. So we do not need to worry about that in
474 expr2vbits_Load */
475
476
477 /*------------------------------------------------------------*/
478 /*--- Constructing definedness primitive ops ---*/
479 /*------------------------------------------------------------*/
480
481 /* --------- Defined-if-either-defined --------- */
482
mkDifD8(MCEnv * mce,IRAtom * a1,IRAtom * a2)483 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
484 tl_assert(isShadowAtom(mce,a1));
485 tl_assert(isShadowAtom(mce,a2));
486 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
487 }
488
mkDifD16(MCEnv * mce,IRAtom * a1,IRAtom * a2)489 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
490 tl_assert(isShadowAtom(mce,a1));
491 tl_assert(isShadowAtom(mce,a2));
492 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
493 }
494
mkDifD32(MCEnv * mce,IRAtom * a1,IRAtom * a2)495 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
496 tl_assert(isShadowAtom(mce,a1));
497 tl_assert(isShadowAtom(mce,a2));
498 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
499 }
500
mkDifD64(MCEnv * mce,IRAtom * a1,IRAtom * a2)501 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
502 tl_assert(isShadowAtom(mce,a1));
503 tl_assert(isShadowAtom(mce,a2));
504 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
505 }
506
mkDifDV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)507 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
508 tl_assert(isShadowAtom(mce,a1));
509 tl_assert(isShadowAtom(mce,a2));
510 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
511 }
512
mkDifDV256(MCEnv * mce,IRAtom * a1,IRAtom * a2)513 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
514 tl_assert(isShadowAtom(mce,a1));
515 tl_assert(isShadowAtom(mce,a2));
516 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
517 }
518
519 /* --------- Undefined-if-either-undefined --------- */
520
mkUifU8(MCEnv * mce,IRAtom * a1,IRAtom * a2)521 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
522 tl_assert(isShadowAtom(mce,a1));
523 tl_assert(isShadowAtom(mce,a2));
524 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
525 }
526
mkUifU16(MCEnv * mce,IRAtom * a1,IRAtom * a2)527 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
528 tl_assert(isShadowAtom(mce,a1));
529 tl_assert(isShadowAtom(mce,a2));
530 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
531 }
532
mkUifU32(MCEnv * mce,IRAtom * a1,IRAtom * a2)533 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
534 tl_assert(isShadowAtom(mce,a1));
535 tl_assert(isShadowAtom(mce,a2));
536 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
537 }
538
mkUifU64(MCEnv * mce,IRAtom * a1,IRAtom * a2)539 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
540 tl_assert(isShadowAtom(mce,a1));
541 tl_assert(isShadowAtom(mce,a2));
542 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
543 }
544
mkUifU128(MCEnv * mce,IRAtom * a1,IRAtom * a2)545 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
546 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
547 tl_assert(isShadowAtom(mce,a1));
548 tl_assert(isShadowAtom(mce,a2));
549 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
550 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
551 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
552 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
553 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
554 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
555
556 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
557 }
558
mkUifUV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)559 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
560 tl_assert(isShadowAtom(mce,a1));
561 tl_assert(isShadowAtom(mce,a2));
562 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
563 }
564
mkUifUV256(MCEnv * mce,IRAtom * a1,IRAtom * a2)565 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
566 tl_assert(isShadowAtom(mce,a1));
567 tl_assert(isShadowAtom(mce,a2));
568 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
569 }
570
mkUifU(MCEnv * mce,IRType vty,IRAtom * a1,IRAtom * a2)571 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
572 switch (vty) {
573 case Ity_I8: return mkUifU8(mce, a1, a2);
574 case Ity_I16: return mkUifU16(mce, a1, a2);
575 case Ity_I32: return mkUifU32(mce, a1, a2);
576 case Ity_I64: return mkUifU64(mce, a1, a2);
577 case Ity_I128: return mkUifU128(mce, a1, a2);
578 case Ity_V128: return mkUifUV128(mce, a1, a2);
579 default:
580 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
581 VG_(tool_panic)("memcheck:mkUifU");
582 }
583 }
584
585 /* --------- The Left-family of operations. --------- */
586
mkLeft8(MCEnv * mce,IRAtom * a1)587 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
588 tl_assert(isShadowAtom(mce,a1));
589 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
590 }
591
mkLeft16(MCEnv * mce,IRAtom * a1)592 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
593 tl_assert(isShadowAtom(mce,a1));
594 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
595 }
596
mkLeft32(MCEnv * mce,IRAtom * a1)597 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
598 tl_assert(isShadowAtom(mce,a1));
599 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
600 }
601
mkLeft64(MCEnv * mce,IRAtom * a1)602 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
603 tl_assert(isShadowAtom(mce,a1));
604 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
605 }
606
607 /* --------- 'Improvement' functions for AND/OR. --------- */
608
609 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
610 defined (0); all other -> undefined (1).
611 */
mkImproveAND8(MCEnv * mce,IRAtom * data,IRAtom * vbits)612 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
613 {
614 tl_assert(isOriginalAtom(mce, data));
615 tl_assert(isShadowAtom(mce, vbits));
616 tl_assert(sameKindedAtoms(data, vbits));
617 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
618 }
619
mkImproveAND16(MCEnv * mce,IRAtom * data,IRAtom * vbits)620 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
621 {
622 tl_assert(isOriginalAtom(mce, data));
623 tl_assert(isShadowAtom(mce, vbits));
624 tl_assert(sameKindedAtoms(data, vbits));
625 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
626 }
627
mkImproveAND32(MCEnv * mce,IRAtom * data,IRAtom * vbits)628 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629 {
630 tl_assert(isOriginalAtom(mce, data));
631 tl_assert(isShadowAtom(mce, vbits));
632 tl_assert(sameKindedAtoms(data, vbits));
633 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
634 }
635
mkImproveAND64(MCEnv * mce,IRAtom * data,IRAtom * vbits)636 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
637 {
638 tl_assert(isOriginalAtom(mce, data));
639 tl_assert(isShadowAtom(mce, vbits));
640 tl_assert(sameKindedAtoms(data, vbits));
641 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
642 }
643
mkImproveANDV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)644 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
645 {
646 tl_assert(isOriginalAtom(mce, data));
647 tl_assert(isShadowAtom(mce, vbits));
648 tl_assert(sameKindedAtoms(data, vbits));
649 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
650 }
651
mkImproveANDV256(MCEnv * mce,IRAtom * data,IRAtom * vbits)652 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653 {
654 tl_assert(isOriginalAtom(mce, data));
655 tl_assert(isShadowAtom(mce, vbits));
656 tl_assert(sameKindedAtoms(data, vbits));
657 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
658 }
659
660 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
661 defined (0); all other -> undefined (1).
662 */
mkImproveOR8(MCEnv * mce,IRAtom * data,IRAtom * vbits)663 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
664 {
665 tl_assert(isOriginalAtom(mce, data));
666 tl_assert(isShadowAtom(mce, vbits));
667 tl_assert(sameKindedAtoms(data, vbits));
668 return assignNew(
669 'V', mce, Ity_I8,
670 binop(Iop_Or8,
671 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
672 vbits) );
673 }
674
mkImproveOR16(MCEnv * mce,IRAtom * data,IRAtom * vbits)675 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
676 {
677 tl_assert(isOriginalAtom(mce, data));
678 tl_assert(isShadowAtom(mce, vbits));
679 tl_assert(sameKindedAtoms(data, vbits));
680 return assignNew(
681 'V', mce, Ity_I16,
682 binop(Iop_Or16,
683 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
684 vbits) );
685 }
686
mkImproveOR32(MCEnv * mce,IRAtom * data,IRAtom * vbits)687 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
688 {
689 tl_assert(isOriginalAtom(mce, data));
690 tl_assert(isShadowAtom(mce, vbits));
691 tl_assert(sameKindedAtoms(data, vbits));
692 return assignNew(
693 'V', mce, Ity_I32,
694 binop(Iop_Or32,
695 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
696 vbits) );
697 }
698
mkImproveOR64(MCEnv * mce,IRAtom * data,IRAtom * vbits)699 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
700 {
701 tl_assert(isOriginalAtom(mce, data));
702 tl_assert(isShadowAtom(mce, vbits));
703 tl_assert(sameKindedAtoms(data, vbits));
704 return assignNew(
705 'V', mce, Ity_I64,
706 binop(Iop_Or64,
707 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
708 vbits) );
709 }
710
mkImproveORV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)711 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
712 {
713 tl_assert(isOriginalAtom(mce, data));
714 tl_assert(isShadowAtom(mce, vbits));
715 tl_assert(sameKindedAtoms(data, vbits));
716 return assignNew(
717 'V', mce, Ity_V128,
718 binop(Iop_OrV128,
719 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
720 vbits) );
721 }
722
mkImproveORV256(MCEnv * mce,IRAtom * data,IRAtom * vbits)723 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
724 {
725 tl_assert(isOriginalAtom(mce, data));
726 tl_assert(isShadowAtom(mce, vbits));
727 tl_assert(sameKindedAtoms(data, vbits));
728 return assignNew(
729 'V', mce, Ity_V256,
730 binop(Iop_OrV256,
731 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
732 vbits) );
733 }
734
735 /* --------- Pessimising casts. --------- */
736
737 /* The function returns an expression of type DST_TY. If any of the VBITS
738 is undefined (value == 1) the resulting expression has all bits set to
739 1. Otherwise, all bits are 0. */
740
mkPCastTo(MCEnv * mce,IRType dst_ty,IRAtom * vbits)741 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
742 {
743 IRType src_ty;
744 IRAtom* tmp1;
745
746 /* Note, dst_ty is a shadow type, not an original type. */
747 tl_assert(isShadowAtom(mce,vbits));
748 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
749
750 /* Fast-track some common cases */
751 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
752 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
753
754 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
755 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
756
757 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
758 /* PCast the arg, then clone it. */
759 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
760 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
761 }
762
763 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
764 /* PCast the arg. This gives all 0s or all 1s. Then throw away
765 the top half. */
766 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
767 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
768 }
769
770 /* Else do it the slow way .. */
771 /* First of all, collapse vbits down to a single bit. */
772 tmp1 = NULL;
773 switch (src_ty) {
774 case Ity_I1:
775 tmp1 = vbits;
776 break;
777 case Ity_I8:
778 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
779 break;
780 case Ity_I16:
781 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
782 break;
783 case Ity_I32:
784 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
785 break;
786 case Ity_I64:
787 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
788 break;
789 case Ity_I128: {
790 /* Gah. Chop it in half, OR the halves together, and compare
791 that with zero. */
792 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
793 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
794 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
795 tmp1 = assignNew('V', mce, Ity_I1,
796 unop(Iop_CmpNEZ64, tmp4));
797 break;
798 }
799 default:
800 ppIRType(src_ty);
801 VG_(tool_panic)("mkPCastTo(1)");
802 }
803 tl_assert(tmp1);
804 /* Now widen up to the dst type. */
805 switch (dst_ty) {
806 case Ity_I1:
807 return tmp1;
808 case Ity_I8:
809 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
810 case Ity_I16:
811 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
812 case Ity_I32:
813 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
814 case Ity_I64:
815 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
816 case Ity_V128:
817 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
818 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
819 return tmp1;
820 case Ity_I128:
821 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
822 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
823 return tmp1;
824 default:
825 ppIRType(dst_ty);
826 VG_(tool_panic)("mkPCastTo(2)");
827 }
828 }
829
830 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
831 /*
832 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
833 PCasting to Ity_U1. However, sometimes it is necessary to be more
834 accurate. The insight is that the result is defined if two
835 corresponding bits can be found, one from each argument, so that
836 both bits are defined but are different -- that makes EQ say "No"
837 and NE say "Yes". Hence, we compute an improvement term and DifD
838 it onto the "normal" (UifU) result.
839
840 The result is:
841
842 PCastTo<1> (
843 -- naive version
844 PCastTo<sz>( UifU<sz>(vxx, vyy) )
845
846 `DifD<sz>`
847
848 -- improvement term
849 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
850 )
851
852 where
853 vec contains 0 (defined) bits where the corresponding arg bits
854 are defined but different, and 1 bits otherwise.
855
856 vec = Or<sz>( vxx, // 0 iff bit defined
857 vyy, // 0 iff bit defined
858 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
859 )
860
861 If any bit of vec is 0, the result is defined and so the
862 improvement term should produce 0...0, else it should produce
863 1...1.
864
865 Hence require for the improvement term:
866
867 if vec == 1...1 then 1...1 else 0...0
868 ->
869 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
870
871 This was extensively re-analysed and checked on 6 July 05.
872 */
expensiveCmpEQorNE(MCEnv * mce,IRType ty,IRAtom * vxx,IRAtom * vyy,IRAtom * xx,IRAtom * yy)873 static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
874 IRType ty,
875 IRAtom* vxx, IRAtom* vyy,
876 IRAtom* xx, IRAtom* yy )
877 {
878 IRAtom *naive, *vec, *improvement_term;
879 IRAtom *improved, *final_cast, *top;
880 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
881
882 tl_assert(isShadowAtom(mce,vxx));
883 tl_assert(isShadowAtom(mce,vyy));
884 tl_assert(isOriginalAtom(mce,xx));
885 tl_assert(isOriginalAtom(mce,yy));
886 tl_assert(sameKindedAtoms(vxx,xx));
887 tl_assert(sameKindedAtoms(vyy,yy));
888
889 switch (ty) {
890 case Ity_I32:
891 opOR = Iop_Or32;
892 opDIFD = Iop_And32;
893 opUIFU = Iop_Or32;
894 opNOT = Iop_Not32;
895 opXOR = Iop_Xor32;
896 opCMP = Iop_CmpEQ32;
897 top = mkU32(0xFFFFFFFF);
898 break;
899 case Ity_I64:
900 opOR = Iop_Or64;
901 opDIFD = Iop_And64;
902 opUIFU = Iop_Or64;
903 opNOT = Iop_Not64;
904 opXOR = Iop_Xor64;
905 opCMP = Iop_CmpEQ64;
906 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
907 break;
908 default:
909 VG_(tool_panic)("expensiveCmpEQorNE");
910 }
911
912 naive
913 = mkPCastTo(mce,ty,
914 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
915
916 vec
917 = assignNew(
918 'V', mce,ty,
919 binop( opOR,
920 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
921 assignNew(
922 'V', mce,ty,
923 unop( opNOT,
924 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
925
926 improvement_term
927 = mkPCastTo( mce,ty,
928 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
929
930 improved
931 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
932
933 final_cast
934 = mkPCastTo( mce, Ity_I1, improved );
935
936 return final_cast;
937 }
938
939
940 /* --------- Semi-accurate interpretation of CmpORD. --------- */
941
942 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
943
944 CmpORD32S(x,y) = 1<<3 if x <s y
945 = 1<<2 if x >s y
946 = 1<<1 if x == y
947
948 and similarly the unsigned variant. The default interpretation is:
949
950 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
951 & (7<<1)
952
953 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
954 are zero and therefore defined (viz, zero).
955
956 Also deal with a special case better:
957
958 CmpORD32S(x,0)
959
960 Here, bit 3 (LT) of the result is a copy of the top bit of x and
961 will be defined even if the rest of x isn't. In which case we do:
962
963 CmpORD32S#(x,x#,0,{impliedly 0}#)
964 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
965 | (x# >>u 31) << 3 -- LT# = x#[31]
966
967 Analogous handling for CmpORD64{S,U}.
968 */
isZeroU32(IRAtom * e)969 static Bool isZeroU32 ( IRAtom* e )
970 {
971 return
972 toBool( e->tag == Iex_Const
973 && e->Iex.Const.con->tag == Ico_U32
974 && e->Iex.Const.con->Ico.U32 == 0 );
975 }
976
isZeroU64(IRAtom * e)977 static Bool isZeroU64 ( IRAtom* e )
978 {
979 return
980 toBool( e->tag == Iex_Const
981 && e->Iex.Const.con->tag == Ico_U64
982 && e->Iex.Const.con->Ico.U64 == 0 );
983 }
984
doCmpORD(MCEnv * mce,IROp cmp_op,IRAtom * xxhash,IRAtom * yyhash,IRAtom * xx,IRAtom * yy)985 static IRAtom* doCmpORD ( MCEnv* mce,
986 IROp cmp_op,
987 IRAtom* xxhash, IRAtom* yyhash,
988 IRAtom* xx, IRAtom* yy )
989 {
990 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
991 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
992 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
993 IROp opAND = m64 ? Iop_And64 : Iop_And32;
994 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
995 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
996 IRType ty = m64 ? Ity_I64 : Ity_I32;
997 Int width = m64 ? 64 : 32;
998
999 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1000
1001 IRAtom* threeLeft1 = NULL;
1002 IRAtom* sevenLeft1 = NULL;
1003
1004 tl_assert(isShadowAtom(mce,xxhash));
1005 tl_assert(isShadowAtom(mce,yyhash));
1006 tl_assert(isOriginalAtom(mce,xx));
1007 tl_assert(isOriginalAtom(mce,yy));
1008 tl_assert(sameKindedAtoms(xxhash,xx));
1009 tl_assert(sameKindedAtoms(yyhash,yy));
1010 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1011 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
1012
1013 if (0) {
1014 ppIROp(cmp_op); VG_(printf)(" ");
1015 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1016 }
1017
1018 if (syned && isZero(yy)) {
1019 /* fancy interpretation */
1020 /* if yy is zero, then it must be fully defined (zero#). */
1021 tl_assert(isZero(yyhash));
1022 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
1023 return
1024 binop(
1025 opOR,
1026 assignNew(
1027 'V', mce,ty,
1028 binop(
1029 opAND,
1030 mkPCastTo(mce,ty, xxhash),
1031 threeLeft1
1032 )),
1033 assignNew(
1034 'V', mce,ty,
1035 binop(
1036 opSHL,
1037 assignNew(
1038 'V', mce,ty,
1039 binop(opSHR, xxhash, mkU8(width-1))),
1040 mkU8(3)
1041 ))
1042 );
1043 } else {
1044 /* standard interpretation */
1045 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
1046 return
1047 binop(
1048 opAND,
1049 mkPCastTo( mce,ty,
1050 mkUifU(mce,ty, xxhash,yyhash)),
1051 sevenLeft1
1052 );
1053 }
1054 }
1055
1056
1057 /*------------------------------------------------------------*/
1058 /*--- Emit a test and complaint if something is undefined. ---*/
1059 /*------------------------------------------------------------*/
1060
1061 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1062
1063
1064 /* Set the annotations on a dirty helper to indicate that the stack
1065 pointer and instruction pointers might be read. This is the
1066 behaviour of all 'emit-a-complaint' style functions we might
1067 call. */
1068
setHelperAnns(MCEnv * mce,IRDirty * di)1069 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1070 di->nFxState = 2;
1071 di->fxState[0].fx = Ifx_Read;
1072 di->fxState[0].offset = mce->layout->offset_SP;
1073 di->fxState[0].size = mce->layout->sizeof_SP;
1074 di->fxState[0].nRepeats = 0;
1075 di->fxState[0].repeatLen = 0;
1076 di->fxState[1].fx = Ifx_Read;
1077 di->fxState[1].offset = mce->layout->offset_IP;
1078 di->fxState[1].size = mce->layout->sizeof_IP;
1079 di->fxState[1].nRepeats = 0;
1080 di->fxState[1].repeatLen = 0;
1081 }
1082
1083
1084 /* Check the supplied **original** atom for undefinedness, and emit a
1085 complaint if so. Once that happens, mark it as defined. This is
1086 possible because the atom is either a tmp or literal. If it's a
1087 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1088 be defined. In fact as mentioned above, we will have to allocate a
1089 new tmp to carry the new 'defined' shadow value, and update the
1090 original->tmp mapping accordingly; we cannot simply assign a new
1091 value to an existing shadow tmp as this breaks SSAness -- resulting
1092 in the post-instrumentation sanity checker spluttering in disapproval.
1093 */
complainIfUndefined(MCEnv * mce,IRAtom * atom,IRExpr * guard)1094 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
1095 {
1096 IRAtom* vatom;
1097 IRType ty;
1098 Int sz;
1099 IRDirty* di;
1100 IRAtom* cond;
1101 IRAtom* origin;
1102 void* fn;
1103 HChar* nm;
1104 IRExpr** args;
1105 Int nargs;
1106
1107 // Don't do V bit tests if we're not reporting undefined value errors.
1108 if (MC_(clo_mc_level) == 1)
1109 return;
1110
1111 /* Since the original expression is atomic, there's no duplicated
1112 work generated by making multiple V-expressions for it. So we
1113 don't really care about the possibility that someone else may
1114 also create a V-interpretion for it. */
1115 tl_assert(isOriginalAtom(mce, atom));
1116 vatom = expr2vbits( mce, atom );
1117 tl_assert(isShadowAtom(mce, vatom));
1118 tl_assert(sameKindedAtoms(atom, vatom));
1119
1120 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1121
1122 /* sz is only used for constructing the error message */
1123 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1124
1125 cond = mkPCastTo( mce, Ity_I1, vatom );
1126 /* cond will be 0 if all defined, and 1 if any not defined. */
1127
1128 /* Get the origin info for the value we are about to check. At
1129 least, if we are doing origin tracking. If not, use a dummy
1130 zero origin. */
1131 if (MC_(clo_mc_level) == 3) {
1132 origin = schemeE( mce, atom );
1133 if (mce->hWordTy == Ity_I64) {
1134 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1135 }
1136 } else {
1137 origin = NULL;
1138 }
1139
1140 fn = NULL;
1141 nm = NULL;
1142 args = NULL;
1143 nargs = -1;
1144
1145 switch (sz) {
1146 case 0:
1147 if (origin) {
1148 fn = &MC_(helperc_value_check0_fail_w_o);
1149 nm = "MC_(helperc_value_check0_fail_w_o)";
1150 args = mkIRExprVec_1(origin);
1151 nargs = 1;
1152 } else {
1153 fn = &MC_(helperc_value_check0_fail_no_o);
1154 nm = "MC_(helperc_value_check0_fail_no_o)";
1155 args = mkIRExprVec_0();
1156 nargs = 0;
1157 }
1158 break;
1159 case 1:
1160 if (origin) {
1161 fn = &MC_(helperc_value_check1_fail_w_o);
1162 nm = "MC_(helperc_value_check1_fail_w_o)";
1163 args = mkIRExprVec_1(origin);
1164 nargs = 1;
1165 } else {
1166 fn = &MC_(helperc_value_check1_fail_no_o);
1167 nm = "MC_(helperc_value_check1_fail_no_o)";
1168 args = mkIRExprVec_0();
1169 nargs = 0;
1170 }
1171 break;
1172 case 4:
1173 if (origin) {
1174 fn = &MC_(helperc_value_check4_fail_w_o);
1175 nm = "MC_(helperc_value_check4_fail_w_o)";
1176 args = mkIRExprVec_1(origin);
1177 nargs = 1;
1178 } else {
1179 fn = &MC_(helperc_value_check4_fail_no_o);
1180 nm = "MC_(helperc_value_check4_fail_no_o)";
1181 args = mkIRExprVec_0();
1182 nargs = 0;
1183 }
1184 break;
1185 case 8:
1186 if (origin) {
1187 fn = &MC_(helperc_value_check8_fail_w_o);
1188 nm = "MC_(helperc_value_check8_fail_w_o)";
1189 args = mkIRExprVec_1(origin);
1190 nargs = 1;
1191 } else {
1192 fn = &MC_(helperc_value_check8_fail_no_o);
1193 nm = "MC_(helperc_value_check8_fail_no_o)";
1194 args = mkIRExprVec_0();
1195 nargs = 0;
1196 }
1197 break;
1198 case 2:
1199 case 16:
1200 if (origin) {
1201 fn = &MC_(helperc_value_checkN_fail_w_o);
1202 nm = "MC_(helperc_value_checkN_fail_w_o)";
1203 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1204 nargs = 2;
1205 } else {
1206 fn = &MC_(helperc_value_checkN_fail_no_o);
1207 nm = "MC_(helperc_value_checkN_fail_no_o)";
1208 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1209 nargs = 1;
1210 }
1211 break;
1212 default:
1213 VG_(tool_panic)("unexpected szB");
1214 }
1215
1216 tl_assert(fn);
1217 tl_assert(nm);
1218 tl_assert(args);
1219 tl_assert(nargs >= 0 && nargs <= 2);
1220 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1221 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1222
1223 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1224 VG_(fnptr_to_fnentry)( fn ), args );
1225 di->guard = cond;
1226
1227 /* If the complaint is to be issued under a guard condition, AND that
1228 guard condition. */
1229 if (guard) {
1230 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1231 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1232 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1233
1234 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1235 }
1236
1237 setHelperAnns( mce, di );
1238 stmt( 'V', mce, IRStmt_Dirty(di));
1239
1240 /* Set the shadow tmp to be defined. First, update the
1241 orig->shadow tmp mapping to reflect the fact that this shadow is
1242 getting a new value. */
1243 tl_assert(isIRAtom(vatom));
1244 /* sameKindedAtoms ... */
1245 if (vatom->tag == Iex_RdTmp) {
1246 tl_assert(atom->tag == Iex_RdTmp);
1247 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1248 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1249 definedOfType(ty));
1250 }
1251 }
1252
1253
1254 /*------------------------------------------------------------*/
1255 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1256 /*------------------------------------------------------------*/
1257
1258 /* Examine the always-defined sections declared in layout to see if
1259 the (offset,size) section is within one. Note, is is an error to
1260 partially fall into such a region: (offset,size) should either be
1261 completely in such a region or completely not-in such a region.
1262 */
isAlwaysDefd(MCEnv * mce,Int offset,Int size)1263 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1264 {
1265 Int minoffD, maxoffD, i;
1266 Int minoff = offset;
1267 Int maxoff = minoff + size - 1;
1268 tl_assert((minoff & ~0xFFFF) == 0);
1269 tl_assert((maxoff & ~0xFFFF) == 0);
1270
1271 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1272 minoffD = mce->layout->alwaysDefd[i].offset;
1273 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1274 tl_assert((minoffD & ~0xFFFF) == 0);
1275 tl_assert((maxoffD & ~0xFFFF) == 0);
1276
1277 if (maxoff < minoffD || maxoffD < minoff)
1278 continue; /* no overlap */
1279 if (minoff >= minoffD && maxoff <= maxoffD)
1280 return True; /* completely contained in an always-defd section */
1281
1282 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1283 }
1284 return False; /* could not find any containing section */
1285 }
1286
1287
1288 /* Generate into bb suitable actions to shadow this Put. If the state
1289 slice is marked 'always defined', do nothing. Otherwise, write the
1290 supplied V bits to the shadow state. We can pass in either an
1291 original atom or a V-atom, but not both. In the former case the
1292 relevant V-bits are then generated from the original.
1293 We assume here, that the definedness of GUARD has already been checked.
1294 */
1295 static
do_shadow_PUT(MCEnv * mce,Int offset,IRAtom * atom,IRAtom * vatom,IRExpr * guard)1296 void do_shadow_PUT ( MCEnv* mce, Int offset,
1297 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
1298 {
1299 IRType ty;
1300
1301 // Don't do shadow PUTs if we're not doing undefined value checking.
1302 // Their absence lets Vex's optimiser remove all the shadow computation
1303 // that they depend on, which includes GETs of the shadow registers.
1304 if (MC_(clo_mc_level) == 1)
1305 return;
1306
1307 if (atom) {
1308 tl_assert(!vatom);
1309 tl_assert(isOriginalAtom(mce, atom));
1310 vatom = expr2vbits( mce, atom );
1311 } else {
1312 tl_assert(vatom);
1313 tl_assert(isShadowAtom(mce, vatom));
1314 }
1315
1316 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1317 tl_assert(ty != Ity_I1);
1318 tl_assert(ty != Ity_I128);
1319 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1320 /* later: no ... */
1321 /* emit code to emit a complaint if any of the vbits are 1. */
1322 /* complainIfUndefined(mce, atom); */
1323 } else {
1324 /* Do a plain shadow Put. */
1325 if (guard) {
1326 /* If the guard expression evaluates to false we simply Put the value
1327 that is already stored in the guest state slot */
1328 IRAtom *cond, *iffalse;
1329
1330 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
1331 iffalse = assignNew('V', mce, ty,
1332 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1333 vatom = assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, vatom));
1334 }
1335 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
1336 }
1337 }
1338
1339
1340 /* Return an expression which contains the V bits corresponding to the
1341 given GETI (passed in in pieces).
1342 */
1343 static
do_shadow_PUTI(MCEnv * mce,IRPutI * puti)1344 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
1345 {
1346 IRAtom* vatom;
1347 IRType ty, tyS;
1348 Int arrSize;;
1349 IRRegArray* descr = puti->descr;
1350 IRAtom* ix = puti->ix;
1351 Int bias = puti->bias;
1352 IRAtom* atom = puti->data;
1353
1354 // Don't do shadow PUTIs if we're not doing undefined value checking.
1355 // Their absence lets Vex's optimiser remove all the shadow computation
1356 // that they depend on, which includes GETIs of the shadow registers.
1357 if (MC_(clo_mc_level) == 1)
1358 return;
1359
1360 tl_assert(isOriginalAtom(mce,atom));
1361 vatom = expr2vbits( mce, atom );
1362 tl_assert(sameKindedAtoms(atom, vatom));
1363 ty = descr->elemTy;
1364 tyS = shadowTypeV(ty);
1365 arrSize = descr->nElems * sizeofIRType(ty);
1366 tl_assert(ty != Ity_I1);
1367 tl_assert(isOriginalAtom(mce,ix));
1368 complainIfUndefined(mce, ix, NULL);
1369 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1370 /* later: no ... */
1371 /* emit code to emit a complaint if any of the vbits are 1. */
1372 /* complainIfUndefined(mce, atom); */
1373 } else {
1374 /* Do a cloned version of the Put that refers to the shadow
1375 area. */
1376 IRRegArray* new_descr
1377 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1378 tyS, descr->nElems);
1379 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
1380 }
1381 }
1382
1383
1384 /* Return an expression which contains the V bits corresponding to the
1385 given GET (passed in in pieces).
1386 */
1387 static
shadow_GET(MCEnv * mce,Int offset,IRType ty)1388 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1389 {
1390 IRType tyS = shadowTypeV(ty);
1391 tl_assert(ty != Ity_I1);
1392 tl_assert(ty != Ity_I128);
1393 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1394 /* Always defined, return all zeroes of the relevant type */
1395 return definedOfType(tyS);
1396 } else {
1397 /* return a cloned version of the Get that refers to the shadow
1398 area. */
1399 /* FIXME: this isn't an atom! */
1400 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1401 }
1402 }
1403
1404
1405 /* Return an expression which contains the V bits corresponding to the
1406 given GETI (passed in in pieces).
1407 */
1408 static
shadow_GETI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias)1409 IRExpr* shadow_GETI ( MCEnv* mce,
1410 IRRegArray* descr, IRAtom* ix, Int bias )
1411 {
1412 IRType ty = descr->elemTy;
1413 IRType tyS = shadowTypeV(ty);
1414 Int arrSize = descr->nElems * sizeofIRType(ty);
1415 tl_assert(ty != Ity_I1);
1416 tl_assert(isOriginalAtom(mce,ix));
1417 complainIfUndefined(mce, ix, NULL);
1418 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1419 /* Always defined, return all zeroes of the relevant type */
1420 return definedOfType(tyS);
1421 } else {
1422 /* return a cloned version of the Get that refers to the shadow
1423 area. */
1424 IRRegArray* new_descr
1425 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1426 tyS, descr->nElems);
1427 return IRExpr_GetI( new_descr, ix, bias );
1428 }
1429 }
1430
1431
1432 /*------------------------------------------------------------*/
1433 /*--- Generating approximations for unknown operations, ---*/
1434 /*--- using lazy-propagate semantics ---*/
1435 /*------------------------------------------------------------*/
1436
1437 /* Lazy propagation of undefinedness from two values, resulting in the
1438 specified shadow type.
1439 */
1440 static
mkLazy2(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2)1441 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1442 {
1443 IRAtom* at;
1444 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1445 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1446 tl_assert(isShadowAtom(mce,va1));
1447 tl_assert(isShadowAtom(mce,va2));
1448
1449 /* The general case is inefficient because PCast is an expensive
1450 operation. Here are some special cases which use PCast only
1451 once rather than twice. */
1452
1453 /* I64 x I64 -> I64 */
1454 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1455 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1456 at = mkUifU(mce, Ity_I64, va1, va2);
1457 at = mkPCastTo(mce, Ity_I64, at);
1458 return at;
1459 }
1460
1461 /* I64 x I64 -> I32 */
1462 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1463 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1464 at = mkUifU(mce, Ity_I64, va1, va2);
1465 at = mkPCastTo(mce, Ity_I32, at);
1466 return at;
1467 }
1468
1469 if (0) {
1470 VG_(printf)("mkLazy2 ");
1471 ppIRType(t1);
1472 VG_(printf)("_");
1473 ppIRType(t2);
1474 VG_(printf)("_");
1475 ppIRType(finalVty);
1476 VG_(printf)("\n");
1477 }
1478
1479 /* General case: force everything via 32-bit intermediaries. */
1480 at = mkPCastTo(mce, Ity_I32, va1);
1481 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1482 at = mkPCastTo(mce, finalVty, at);
1483 return at;
1484 }
1485
1486
1487 /* 3-arg version of the above. */
1488 static
mkLazy3(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2,IRAtom * va3)1489 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1490 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1491 {
1492 IRAtom* at;
1493 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1494 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1495 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1496 tl_assert(isShadowAtom(mce,va1));
1497 tl_assert(isShadowAtom(mce,va2));
1498 tl_assert(isShadowAtom(mce,va3));
1499
1500 /* The general case is inefficient because PCast is an expensive
1501 operation. Here are some special cases which use PCast only
1502 twice rather than three times. */
1503
1504 /* I32 x I64 x I64 -> I64 */
1505 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1506 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1507 && finalVty == Ity_I64) {
1508 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1509 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1510 mode indication which is fully defined, this should get
1511 folded out later. */
1512 at = mkPCastTo(mce, Ity_I64, va1);
1513 /* Now fold in 2nd and 3rd args. */
1514 at = mkUifU(mce, Ity_I64, at, va2);
1515 at = mkUifU(mce, Ity_I64, at, va3);
1516 /* and PCast once again. */
1517 at = mkPCastTo(mce, Ity_I64, at);
1518 return at;
1519 }
1520
1521 /* I32 x I64 x I64 -> I32 */
1522 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1523 && finalVty == Ity_I32) {
1524 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1525 at = mkPCastTo(mce, Ity_I64, va1);
1526 at = mkUifU(mce, Ity_I64, at, va2);
1527 at = mkUifU(mce, Ity_I64, at, va3);
1528 at = mkPCastTo(mce, Ity_I32, at);
1529 return at;
1530 }
1531
1532 /* I32 x I32 x I32 -> I32 */
1533 /* 32-bit FP idiom, as (eg) happens on ARM */
1534 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1535 && finalVty == Ity_I32) {
1536 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1537 at = va1;
1538 at = mkUifU(mce, Ity_I32, at, va2);
1539 at = mkUifU(mce, Ity_I32, at, va3);
1540 at = mkPCastTo(mce, Ity_I32, at);
1541 return at;
1542 }
1543
1544 /* I32 x I128 x I128 -> I128 */
1545 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1546 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1547 && finalVty == Ity_I128) {
1548 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1549 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1550 mode indication which is fully defined, this should get
1551 folded out later. */
1552 at = mkPCastTo(mce, Ity_I128, va1);
1553 /* Now fold in 2nd and 3rd args. */
1554 at = mkUifU(mce, Ity_I128, at, va2);
1555 at = mkUifU(mce, Ity_I128, at, va3);
1556 /* and PCast once again. */
1557 at = mkPCastTo(mce, Ity_I128, at);
1558 return at;
1559 }
1560 if (1) {
1561 VG_(printf)("mkLazy3: ");
1562 ppIRType(t1);
1563 VG_(printf)(" x ");
1564 ppIRType(t2);
1565 VG_(printf)(" x ");
1566 ppIRType(t3);
1567 VG_(printf)(" -> ");
1568 ppIRType(finalVty);
1569 VG_(printf)("\n");
1570 }
1571
1572 tl_assert(0);
1573 /* General case: force everything via 32-bit intermediaries. */
1574 /*
1575 at = mkPCastTo(mce, Ity_I32, va1);
1576 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1577 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1578 at = mkPCastTo(mce, finalVty, at);
1579 return at;
1580 */
1581 }
1582
1583
1584 /* 4-arg version of the above. */
1585 static
mkLazy4(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2,IRAtom * va3,IRAtom * va4)1586 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1587 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1588 {
1589 IRAtom* at;
1590 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1591 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1592 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1593 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1594 tl_assert(isShadowAtom(mce,va1));
1595 tl_assert(isShadowAtom(mce,va2));
1596 tl_assert(isShadowAtom(mce,va3));
1597 tl_assert(isShadowAtom(mce,va4));
1598
1599 /* The general case is inefficient because PCast is an expensive
1600 operation. Here are some special cases which use PCast only
1601 twice rather than three times. */
1602
1603 /* I32 x I64 x I64 x I64 -> I64 */
1604 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1605 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1606 && finalVty == Ity_I64) {
1607 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1608 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1609 mode indication which is fully defined, this should get
1610 folded out later. */
1611 at = mkPCastTo(mce, Ity_I64, va1);
1612 /* Now fold in 2nd, 3rd, 4th args. */
1613 at = mkUifU(mce, Ity_I64, at, va2);
1614 at = mkUifU(mce, Ity_I64, at, va3);
1615 at = mkUifU(mce, Ity_I64, at, va4);
1616 /* and PCast once again. */
1617 at = mkPCastTo(mce, Ity_I64, at);
1618 return at;
1619 }
1620 /* I32 x I32 x I32 x I32 -> I32 */
1621 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1622 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1623 && finalVty == Ity_I32) {
1624 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1625 at = va1;
1626 /* Now fold in 2nd, 3rd, 4th args. */
1627 at = mkUifU(mce, Ity_I32, at, va2);
1628 at = mkUifU(mce, Ity_I32, at, va3);
1629 at = mkUifU(mce, Ity_I32, at, va4);
1630 at = mkPCastTo(mce, Ity_I32, at);
1631 return at;
1632 }
1633
1634 if (1) {
1635 VG_(printf)("mkLazy4: ");
1636 ppIRType(t1);
1637 VG_(printf)(" x ");
1638 ppIRType(t2);
1639 VG_(printf)(" x ");
1640 ppIRType(t3);
1641 VG_(printf)(" x ");
1642 ppIRType(t4);
1643 VG_(printf)(" -> ");
1644 ppIRType(finalVty);
1645 VG_(printf)("\n");
1646 }
1647
1648 tl_assert(0);
1649 }
1650
1651
1652 /* Do the lazy propagation game from a null-terminated vector of
1653 atoms. This is presumably the arguments to a helper call, so the
1654 IRCallee info is also supplied in order that we can know which
1655 arguments should be ignored (via the .mcx_mask field).
1656 */
1657 static
mkLazyN(MCEnv * mce,IRAtom ** exprvec,IRType finalVtype,IRCallee * cee)1658 IRAtom* mkLazyN ( MCEnv* mce,
1659 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1660 {
1661 Int i;
1662 IRAtom* here;
1663 IRAtom* curr;
1664 IRType mergeTy;
1665 Bool mergeTy64 = True;
1666
1667 /* Decide on the type of the merge intermediary. If all relevant
1668 args are I64, then it's I64. In all other circumstances, use
1669 I32. */
1670 for (i = 0; exprvec[i]; i++) {
1671 tl_assert(i < 32);
1672 tl_assert(isOriginalAtom(mce, exprvec[i]));
1673 if (cee->mcx_mask & (1<<i))
1674 continue;
1675 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
1676 mergeTy64 = False;
1677 }
1678
1679 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1680 curr = definedOfType(mergeTy);
1681
1682 for (i = 0; exprvec[i]; i++) {
1683 tl_assert(i < 32);
1684 tl_assert(isOriginalAtom(mce, exprvec[i]));
1685 /* Only take notice of this arg if the callee's mc-exclusion
1686 mask does not say it is to be excluded. */
1687 if (cee->mcx_mask & (1<<i)) {
1688 /* the arg is to be excluded from definedness checking. Do
1689 nothing. */
1690 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1691 } else {
1692 /* calculate the arg's definedness, and pessimistically merge
1693 it in. */
1694 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1695 curr = mergeTy64
1696 ? mkUifU64(mce, here, curr)
1697 : mkUifU32(mce, here, curr);
1698 }
1699 }
1700 return mkPCastTo(mce, finalVtype, curr );
1701 }
1702
1703
1704 /*------------------------------------------------------------*/
1705 /*--- Generating expensive sequences for exact carry-chain ---*/
1706 /*--- propagation in add/sub and related operations. ---*/
1707 /*------------------------------------------------------------*/
1708
1709 static
expensiveAddSub(MCEnv * mce,Bool add,IRType ty,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1710 IRAtom* expensiveAddSub ( MCEnv* mce,
1711 Bool add,
1712 IRType ty,
1713 IRAtom* qaa, IRAtom* qbb,
1714 IRAtom* aa, IRAtom* bb )
1715 {
1716 IRAtom *a_min, *b_min, *a_max, *b_max;
1717 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
1718
1719 tl_assert(isShadowAtom(mce,qaa));
1720 tl_assert(isShadowAtom(mce,qbb));
1721 tl_assert(isOriginalAtom(mce,aa));
1722 tl_assert(isOriginalAtom(mce,bb));
1723 tl_assert(sameKindedAtoms(qaa,aa));
1724 tl_assert(sameKindedAtoms(qbb,bb));
1725
1726 switch (ty) {
1727 case Ity_I32:
1728 opAND = Iop_And32;
1729 opOR = Iop_Or32;
1730 opXOR = Iop_Xor32;
1731 opNOT = Iop_Not32;
1732 opADD = Iop_Add32;
1733 opSUB = Iop_Sub32;
1734 break;
1735 case Ity_I64:
1736 opAND = Iop_And64;
1737 opOR = Iop_Or64;
1738 opXOR = Iop_Xor64;
1739 opNOT = Iop_Not64;
1740 opADD = Iop_Add64;
1741 opSUB = Iop_Sub64;
1742 break;
1743 default:
1744 VG_(tool_panic)("expensiveAddSub");
1745 }
1746
1747 // a_min = aa & ~qaa
1748 a_min = assignNew('V', mce,ty,
1749 binop(opAND, aa,
1750 assignNew('V', mce,ty, unop(opNOT, qaa))));
1751
1752 // b_min = bb & ~qbb
1753 b_min = assignNew('V', mce,ty,
1754 binop(opAND, bb,
1755 assignNew('V', mce,ty, unop(opNOT, qbb))));
1756
1757 // a_max = aa | qaa
1758 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
1759
1760 // b_max = bb | qbb
1761 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
1762
1763 if (add) {
1764 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1765 return
1766 assignNew('V', mce,ty,
1767 binop( opOR,
1768 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1769 assignNew('V', mce,ty,
1770 binop( opXOR,
1771 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1772 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
1773 )
1774 )
1775 )
1776 );
1777 } else {
1778 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1779 return
1780 assignNew('V', mce,ty,
1781 binop( opOR,
1782 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1783 assignNew('V', mce,ty,
1784 binop( opXOR,
1785 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1786 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
1787 )
1788 )
1789 )
1790 );
1791 }
1792
1793 }
1794
1795
1796 /*------------------------------------------------------------*/
1797 /*--- Scalar shifts. ---*/
1798 /*------------------------------------------------------------*/
1799
1800 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1801 idea is to shift the definedness bits by the original shift amount.
1802 This introduces 0s ("defined") in new positions for left shifts and
1803 unsigned right shifts, and copies the top definedness bit for
1804 signed right shifts. So, conveniently, applying the original shift
1805 operator to the definedness bits for the left arg is exactly the
1806 right thing to do:
1807
1808 (qaa << bb)
1809
1810 However if the shift amount is undefined then the whole result
1811 is undefined. Hence need:
1812
1813 (qaa << bb) `UifU` PCast(qbb)
1814
1815 If the shift amount bb is a literal than qbb will say 'all defined'
1816 and the UifU and PCast will get folded out by post-instrumentation
1817 optimisation.
1818 */
scalarShift(MCEnv * mce,IRType ty,IROp original_op,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1819 static IRAtom* scalarShift ( MCEnv* mce,
1820 IRType ty,
1821 IROp original_op,
1822 IRAtom* qaa, IRAtom* qbb,
1823 IRAtom* aa, IRAtom* bb )
1824 {
1825 tl_assert(isShadowAtom(mce,qaa));
1826 tl_assert(isShadowAtom(mce,qbb));
1827 tl_assert(isOriginalAtom(mce,aa));
1828 tl_assert(isOriginalAtom(mce,bb));
1829 tl_assert(sameKindedAtoms(qaa,aa));
1830 tl_assert(sameKindedAtoms(qbb,bb));
1831 return
1832 assignNew(
1833 'V', mce, ty,
1834 mkUifU( mce, ty,
1835 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
1836 mkPCastTo(mce, ty, qbb)
1837 )
1838 );
1839 }
1840
1841
1842 /*------------------------------------------------------------*/
1843 /*--- Helpers for dealing with vector primops. ---*/
1844 /*------------------------------------------------------------*/
1845
1846 /* Vector pessimisation -- pessimise within each lane individually. */
1847
mkPCast8x16(MCEnv * mce,IRAtom * at)1848 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1849 {
1850 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1851 }
1852
mkPCast16x8(MCEnv * mce,IRAtom * at)1853 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1854 {
1855 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1856 }
1857
mkPCast32x4(MCEnv * mce,IRAtom * at)1858 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1859 {
1860 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1861 }
1862
mkPCast64x2(MCEnv * mce,IRAtom * at)1863 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1864 {
1865 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1866 }
1867
mkPCast64x4(MCEnv * mce,IRAtom * at)1868 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
1869 {
1870 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
1871 }
1872
mkPCast32x8(MCEnv * mce,IRAtom * at)1873 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
1874 {
1875 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
1876 }
1877
mkPCast32x2(MCEnv * mce,IRAtom * at)1878 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1879 {
1880 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1881 }
1882
mkPCast16x4(MCEnv * mce,IRAtom * at)1883 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1884 {
1885 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1886 }
1887
mkPCast8x8(MCEnv * mce,IRAtom * at)1888 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1889 {
1890 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1891 }
1892
mkPCast16x2(MCEnv * mce,IRAtom * at)1893 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1894 {
1895 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1896 }
1897
mkPCast8x4(MCEnv * mce,IRAtom * at)1898 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1899 {
1900 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1901 }
1902
1903
1904 /* Here's a simple scheme capable of handling ops derived from SSE1
1905 code and while only generating ops that can be efficiently
1906 implemented in SSE1. */
1907
1908 /* All-lanes versions are straightforward:
1909
1910 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
1911
1912 unary32Fx4(x,y) ==> PCast32x4(x#)
1913
1914 Lowest-lane-only versions are more complex:
1915
1916 binary32F0x4(x,y) ==> SetV128lo32(
1917 x#,
1918 PCast32(V128to32(UifUV128(x#,y#)))
1919 )
1920
1921 This is perhaps not so obvious. In particular, it's faster to
1922 do a V128-bit UifU and then take the bottom 32 bits than the more
1923 obvious scheme of taking the bottom 32 bits of each operand
1924 and doing a 32-bit UifU. Basically since UifU is fast and
1925 chopping lanes off vector values is slow.
1926
1927 Finally:
1928
1929 unary32F0x4(x) ==> SetV128lo32(
1930 x#,
1931 PCast32(V128to32(x#))
1932 )
1933
1934 Where:
1935
1936 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1937 PCast32x4(v#) = CmpNEZ32x4(v#)
1938 */
1939
1940 static
binary32Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1941 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1942 {
1943 IRAtom* at;
1944 tl_assert(isShadowAtom(mce, vatomX));
1945 tl_assert(isShadowAtom(mce, vatomY));
1946 at = mkUifUV128(mce, vatomX, vatomY);
1947 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
1948 return at;
1949 }
1950
1951 static
unary32Fx4(MCEnv * mce,IRAtom * vatomX)1952 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1953 {
1954 IRAtom* at;
1955 tl_assert(isShadowAtom(mce, vatomX));
1956 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
1957 return at;
1958 }
1959
1960 static
binary32F0x4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1961 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1962 {
1963 IRAtom* at;
1964 tl_assert(isShadowAtom(mce, vatomX));
1965 tl_assert(isShadowAtom(mce, vatomY));
1966 at = mkUifUV128(mce, vatomX, vatomY);
1967 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
1968 at = mkPCastTo(mce, Ity_I32, at);
1969 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1970 return at;
1971 }
1972
1973 static
unary32F0x4(MCEnv * mce,IRAtom * vatomX)1974 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1975 {
1976 IRAtom* at;
1977 tl_assert(isShadowAtom(mce, vatomX));
1978 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
1979 at = mkPCastTo(mce, Ity_I32, at);
1980 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1981 return at;
1982 }
1983
1984 /* --- ... and ... 64Fx2 versions of the same ... --- */
1985
1986 static
binary64Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1987 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1988 {
1989 IRAtom* at;
1990 tl_assert(isShadowAtom(mce, vatomX));
1991 tl_assert(isShadowAtom(mce, vatomY));
1992 at = mkUifUV128(mce, vatomX, vatomY);
1993 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
1994 return at;
1995 }
1996
1997 static
unary64Fx2(MCEnv * mce,IRAtom * vatomX)1998 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1999 {
2000 IRAtom* at;
2001 tl_assert(isShadowAtom(mce, vatomX));
2002 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
2003 return at;
2004 }
2005
2006 static
binary64F0x2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2007 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2008 {
2009 IRAtom* at;
2010 tl_assert(isShadowAtom(mce, vatomX));
2011 tl_assert(isShadowAtom(mce, vatomY));
2012 at = mkUifUV128(mce, vatomX, vatomY);
2013 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
2014 at = mkPCastTo(mce, Ity_I64, at);
2015 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2016 return at;
2017 }
2018
2019 static
unary64F0x2(MCEnv * mce,IRAtom * vatomX)2020 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2021 {
2022 IRAtom* at;
2023 tl_assert(isShadowAtom(mce, vatomX));
2024 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
2025 at = mkPCastTo(mce, Ity_I64, at);
2026 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2027 return at;
2028 }
2029
2030 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
2031
2032 static
binary32Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2033 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2034 {
2035 IRAtom* at;
2036 tl_assert(isShadowAtom(mce, vatomX));
2037 tl_assert(isShadowAtom(mce, vatomY));
2038 at = mkUifU64(mce, vatomX, vatomY);
2039 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2040 return at;
2041 }
2042
2043 static
unary32Fx2(MCEnv * mce,IRAtom * vatomX)2044 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2045 {
2046 IRAtom* at;
2047 tl_assert(isShadowAtom(mce, vatomX));
2048 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2049 return at;
2050 }
2051
2052 /* --- ... and ... 64Fx4 versions of the same ... --- */
2053
2054 static
binary64Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2055 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2056 {
2057 IRAtom* at;
2058 tl_assert(isShadowAtom(mce, vatomX));
2059 tl_assert(isShadowAtom(mce, vatomY));
2060 at = mkUifUV256(mce, vatomX, vatomY);
2061 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2062 return at;
2063 }
2064
2065 static
unary64Fx4(MCEnv * mce,IRAtom * vatomX)2066 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2067 {
2068 IRAtom* at;
2069 tl_assert(isShadowAtom(mce, vatomX));
2070 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2071 return at;
2072 }
2073
2074 /* --- ... and ... 32Fx8 versions of the same ... --- */
2075
2076 static
binary32Fx8(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2077 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2078 {
2079 IRAtom* at;
2080 tl_assert(isShadowAtom(mce, vatomX));
2081 tl_assert(isShadowAtom(mce, vatomY));
2082 at = mkUifUV256(mce, vatomX, vatomY);
2083 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2084 return at;
2085 }
2086
2087 static
unary32Fx8(MCEnv * mce,IRAtom * vatomX)2088 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2089 {
2090 IRAtom* at;
2091 tl_assert(isShadowAtom(mce, vatomX));
2092 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2093 return at;
2094 }
2095
2096 /* --- --- Vector saturated narrowing --- --- */
2097
2098 /* We used to do something very clever here, but on closer inspection
2099 (2011-Jun-15), and in particular bug #279698, it turns out to be
2100 wrong. Part of the problem came from the fact that for a long
2101 time, the IR primops to do with saturated narrowing were
2102 underspecified and managed to confuse multiple cases which needed
2103 to be separate: the op names had a signedness qualifier, but in
2104 fact the source and destination signednesses needed to be specified
2105 independently, so the op names really need two independent
2106 signedness specifiers.
2107
2108 As of 2011-Jun-15 (ish) the underspecification was sorted out
2109 properly. The incorrect instrumentation remained, though. That
2110 has now (2011-Oct-22) been fixed.
2111
2112 What we now do is simple:
2113
2114 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2115 number of lanes, X is the source lane width and signedness, and Y
2116 is the destination lane width and signedness. In all cases the
2117 destination lane width is half the source lane width, so the names
2118 have a bit of redundancy, but are at least easy to read.
2119
2120 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2121 to unsigned 16s.
2122
2123 Let Vanilla(OP) be a function that takes OP, one of these
2124 saturating narrowing ops, and produces the same "shaped" narrowing
2125 op which is not saturating, but merely dumps the most significant
2126 bits. "same shape" means that the lane numbers and widths are the
2127 same as with OP.
2128
2129 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2130 = Iop_NarrowBin32to16x8,
2131 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2132 dumping the top half of each lane.
2133
2134 So, with that in place, the scheme is simple, and it is simple to
2135 pessimise each lane individually and then apply Vanilla(OP) so as
2136 to get the result in the right "shape". If the original OP is
2137 QNarrowBinXtoYxZ then we produce
2138
2139 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2140
2141 or for the case when OP is unary (Iop_QNarrowUn*)
2142
2143 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2144 */
2145 static
vanillaNarrowingOpOfShape(IROp qnarrowOp)2146 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2147 {
2148 switch (qnarrowOp) {
2149 /* Binary: (128, 128) -> 128 */
2150 case Iop_QNarrowBin16Sto8Ux16:
2151 case Iop_QNarrowBin16Sto8Sx16:
2152 case Iop_QNarrowBin16Uto8Ux16:
2153 return Iop_NarrowBin16to8x16;
2154 case Iop_QNarrowBin32Sto16Ux8:
2155 case Iop_QNarrowBin32Sto16Sx8:
2156 case Iop_QNarrowBin32Uto16Ux8:
2157 return Iop_NarrowBin32to16x8;
2158 /* Binary: (64, 64) -> 64 */
2159 case Iop_QNarrowBin32Sto16Sx4:
2160 return Iop_NarrowBin32to16x4;
2161 case Iop_QNarrowBin16Sto8Ux8:
2162 case Iop_QNarrowBin16Sto8Sx8:
2163 return Iop_NarrowBin16to8x8;
2164 /* Unary: 128 -> 64 */
2165 case Iop_QNarrowUn64Uto32Ux2:
2166 case Iop_QNarrowUn64Sto32Sx2:
2167 case Iop_QNarrowUn64Sto32Ux2:
2168 return Iop_NarrowUn64to32x2;
2169 case Iop_QNarrowUn32Uto16Ux4:
2170 case Iop_QNarrowUn32Sto16Sx4:
2171 case Iop_QNarrowUn32Sto16Ux4:
2172 return Iop_NarrowUn32to16x4;
2173 case Iop_QNarrowUn16Uto8Ux8:
2174 case Iop_QNarrowUn16Sto8Sx8:
2175 case Iop_QNarrowUn16Sto8Ux8:
2176 return Iop_NarrowUn16to8x8;
2177 default:
2178 ppIROp(qnarrowOp);
2179 VG_(tool_panic)("vanillaNarrowOpOfShape");
2180 }
2181 }
2182
2183 static
vectorNarrowBinV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)2184 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2185 IRAtom* vatom1, IRAtom* vatom2)
2186 {
2187 IRAtom *at1, *at2, *at3;
2188 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2189 switch (narrow_op) {
2190 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2191 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2192 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2193 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2194 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2195 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2196 default: VG_(tool_panic)("vectorNarrowBinV128");
2197 }
2198 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2199 tl_assert(isShadowAtom(mce,vatom1));
2200 tl_assert(isShadowAtom(mce,vatom2));
2201 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2202 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2203 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
2204 return at3;
2205 }
2206
2207 static
vectorNarrowBin64(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)2208 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2209 IRAtom* vatom1, IRAtom* vatom2)
2210 {
2211 IRAtom *at1, *at2, *at3;
2212 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2213 switch (narrow_op) {
2214 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2215 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2216 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2217 default: VG_(tool_panic)("vectorNarrowBin64");
2218 }
2219 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2220 tl_assert(isShadowAtom(mce,vatom1));
2221 tl_assert(isShadowAtom(mce,vatom2));
2222 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2223 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2224 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
2225 return at3;
2226 }
2227
2228 static
vectorNarrowUnV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1)2229 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
2230 IRAtom* vatom1)
2231 {
2232 IRAtom *at1, *at2;
2233 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2234 tl_assert(isShadowAtom(mce,vatom1));
2235 /* For vanilla narrowing (non-saturating), we can just apply
2236 the op directly to the V bits. */
2237 switch (narrow_op) {
2238 case Iop_NarrowUn16to8x8:
2239 case Iop_NarrowUn32to16x4:
2240 case Iop_NarrowUn64to32x2:
2241 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2242 return at1;
2243 default:
2244 break; /* Do Plan B */
2245 }
2246 /* Plan B: for ops that involve a saturation operation on the args,
2247 we must PCast before the vanilla narrow. */
2248 switch (narrow_op) {
2249 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2250 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2251 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2252 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2253 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2254 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2255 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2256 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2257 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2258 default: VG_(tool_panic)("vectorNarrowUnV128");
2259 }
2260 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2261 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2262 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
2263 return at2;
2264 }
2265
2266 static
vectorWidenI64(MCEnv * mce,IROp longen_op,IRAtom * vatom1)2267 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2268 IRAtom* vatom1)
2269 {
2270 IRAtom *at1, *at2;
2271 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2272 switch (longen_op) {
2273 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2274 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2275 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2276 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2277 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2278 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2279 default: VG_(tool_panic)("vectorWidenI64");
2280 }
2281 tl_assert(isShadowAtom(mce,vatom1));
2282 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2283 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2284 return at2;
2285 }
2286
2287
2288 /* --- --- Vector integer arithmetic --- --- */
2289
2290 /* Simple ... UifU the args and per-lane pessimise the results. */
2291
2292 /* --- V128-bit versions --- */
2293
2294 static
binary8Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2295 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2296 {
2297 IRAtom* at;
2298 at = mkUifUV128(mce, vatom1, vatom2);
2299 at = mkPCast8x16(mce, at);
2300 return at;
2301 }
2302
2303 static
binary16Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2304 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2305 {
2306 IRAtom* at;
2307 at = mkUifUV128(mce, vatom1, vatom2);
2308 at = mkPCast16x8(mce, at);
2309 return at;
2310 }
2311
2312 static
binary32Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2313 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2314 {
2315 IRAtom* at;
2316 at = mkUifUV128(mce, vatom1, vatom2);
2317 at = mkPCast32x4(mce, at);
2318 return at;
2319 }
2320
2321 static
binary64Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2322 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2323 {
2324 IRAtom* at;
2325 at = mkUifUV128(mce, vatom1, vatom2);
2326 at = mkPCast64x2(mce, at);
2327 return at;
2328 }
2329
2330 /* --- 64-bit versions --- */
2331
2332 static
binary8Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2333 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2334 {
2335 IRAtom* at;
2336 at = mkUifU64(mce, vatom1, vatom2);
2337 at = mkPCast8x8(mce, at);
2338 return at;
2339 }
2340
2341 static
binary16Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2342 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2343 {
2344 IRAtom* at;
2345 at = mkUifU64(mce, vatom1, vatom2);
2346 at = mkPCast16x4(mce, at);
2347 return at;
2348 }
2349
2350 static
binary32Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2351 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2352 {
2353 IRAtom* at;
2354 at = mkUifU64(mce, vatom1, vatom2);
2355 at = mkPCast32x2(mce, at);
2356 return at;
2357 }
2358
2359 static
binary64Ix1(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2360 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2361 {
2362 IRAtom* at;
2363 at = mkUifU64(mce, vatom1, vatom2);
2364 at = mkPCastTo(mce, Ity_I64, at);
2365 return at;
2366 }
2367
2368 /* --- 32-bit versions --- */
2369
2370 static
binary8Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2371 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2372 {
2373 IRAtom* at;
2374 at = mkUifU32(mce, vatom1, vatom2);
2375 at = mkPCast8x4(mce, at);
2376 return at;
2377 }
2378
2379 static
binary16Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2380 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2381 {
2382 IRAtom* at;
2383 at = mkUifU32(mce, vatom1, vatom2);
2384 at = mkPCast16x2(mce, at);
2385 return at;
2386 }
2387
2388
2389 /*------------------------------------------------------------*/
2390 /*--- Generate shadow values from all kinds of IRExprs. ---*/
2391 /*------------------------------------------------------------*/
2392
2393 static
expr2vbits_Qop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2,IRAtom * atom3,IRAtom * atom4)2394 IRAtom* expr2vbits_Qop ( MCEnv* mce,
2395 IROp op,
2396 IRAtom* atom1, IRAtom* atom2,
2397 IRAtom* atom3, IRAtom* atom4 )
2398 {
2399 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2400 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2401 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2402 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2403
2404 tl_assert(isOriginalAtom(mce,atom1));
2405 tl_assert(isOriginalAtom(mce,atom2));
2406 tl_assert(isOriginalAtom(mce,atom3));
2407 tl_assert(isOriginalAtom(mce,atom4));
2408 tl_assert(isShadowAtom(mce,vatom1));
2409 tl_assert(isShadowAtom(mce,vatom2));
2410 tl_assert(isShadowAtom(mce,vatom3));
2411 tl_assert(isShadowAtom(mce,vatom4));
2412 tl_assert(sameKindedAtoms(atom1,vatom1));
2413 tl_assert(sameKindedAtoms(atom2,vatom2));
2414 tl_assert(sameKindedAtoms(atom3,vatom3));
2415 tl_assert(sameKindedAtoms(atom4,vatom4));
2416 switch (op) {
2417 case Iop_MAddF64:
2418 case Iop_MAddF64r32:
2419 case Iop_MSubF64:
2420 case Iop_MSubF64r32:
2421 /* I32(rm) x F64 x F64 x F64 -> F64 */
2422 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2423
2424 case Iop_MAddF32:
2425 case Iop_MSubF32:
2426 /* I32(rm) x F32 x F32 x F32 -> F32 */
2427 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2428
2429 /* V256-bit data-steering */
2430 case Iop_64x4toV256:
2431 return assignNew('V', mce, Ity_V256,
2432 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2433
2434 default:
2435 ppIROp(op);
2436 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2437 }
2438 }
2439
2440
2441 static
expr2vbits_Triop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2,IRAtom * atom3)2442 IRAtom* expr2vbits_Triop ( MCEnv* mce,
2443 IROp op,
2444 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2445 {
2446 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2447 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2448 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2449
2450 tl_assert(isOriginalAtom(mce,atom1));
2451 tl_assert(isOriginalAtom(mce,atom2));
2452 tl_assert(isOriginalAtom(mce,atom3));
2453 tl_assert(isShadowAtom(mce,vatom1));
2454 tl_assert(isShadowAtom(mce,vatom2));
2455 tl_assert(isShadowAtom(mce,vatom3));
2456 tl_assert(sameKindedAtoms(atom1,vatom1));
2457 tl_assert(sameKindedAtoms(atom2,vatom2));
2458 tl_assert(sameKindedAtoms(atom3,vatom3));
2459 switch (op) {
2460 case Iop_AddF128:
2461 case Iop_AddD128:
2462 case Iop_SubF128:
2463 case Iop_SubD128:
2464 case Iop_MulF128:
2465 case Iop_MulD128:
2466 case Iop_DivF128:
2467 case Iop_DivD128:
2468 case Iop_QuantizeD128:
2469 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
2470 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2471 case Iop_AddF64:
2472 case Iop_AddD64:
2473 case Iop_AddF64r32:
2474 case Iop_SubF64:
2475 case Iop_SubD64:
2476 case Iop_SubF64r32:
2477 case Iop_MulF64:
2478 case Iop_MulD64:
2479 case Iop_MulF64r32:
2480 case Iop_DivF64:
2481 case Iop_DivD64:
2482 case Iop_DivF64r32:
2483 case Iop_ScaleF64:
2484 case Iop_Yl2xF64:
2485 case Iop_Yl2xp1F64:
2486 case Iop_AtanF64:
2487 case Iop_PRemF64:
2488 case Iop_PRem1F64:
2489 case Iop_QuantizeD64:
2490 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
2491 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2492 case Iop_PRemC3210F64:
2493 case Iop_PRem1C3210F64:
2494 /* I32(rm) x F64 x F64 -> I32 */
2495 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2496 case Iop_AddF32:
2497 case Iop_SubF32:
2498 case Iop_MulF32:
2499 case Iop_DivF32:
2500 /* I32(rm) x F32 x F32 -> I32 */
2501 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2502 case Iop_SignificanceRoundD64:
2503 /* IRRoundingModeDFP(I32) x I8 x D64 -> D64 */
2504 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2505 case Iop_SignificanceRoundD128:
2506 /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */
2507 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2508 case Iop_ExtractV128:
2509 complainIfUndefined(mce, atom3, NULL);
2510 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2511 case Iop_Extract64:
2512 complainIfUndefined(mce, atom3, NULL);
2513 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2514 case Iop_SetElem8x8:
2515 case Iop_SetElem16x4:
2516 case Iop_SetElem32x2:
2517 complainIfUndefined(mce, atom2, NULL);
2518 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
2519 default:
2520 ppIROp(op);
2521 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2522 }
2523 }
2524
2525
2526 static
expr2vbits_Binop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2)2527 IRAtom* expr2vbits_Binop ( MCEnv* mce,
2528 IROp op,
2529 IRAtom* atom1, IRAtom* atom2 )
2530 {
2531 IRType and_or_ty;
2532 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2533 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2534 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2535
2536 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2537 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2538
2539 tl_assert(isOriginalAtom(mce,atom1));
2540 tl_assert(isOriginalAtom(mce,atom2));
2541 tl_assert(isShadowAtom(mce,vatom1));
2542 tl_assert(isShadowAtom(mce,vatom2));
2543 tl_assert(sameKindedAtoms(atom1,vatom1));
2544 tl_assert(sameKindedAtoms(atom2,vatom2));
2545 switch (op) {
2546
2547 /* 32-bit SIMD */
2548
2549 case Iop_Add16x2:
2550 case Iop_HAdd16Ux2:
2551 case Iop_HAdd16Sx2:
2552 case Iop_Sub16x2:
2553 case Iop_HSub16Ux2:
2554 case Iop_HSub16Sx2:
2555 case Iop_QAdd16Sx2:
2556 case Iop_QSub16Sx2:
2557 case Iop_QSub16Ux2:
2558 return binary16Ix2(mce, vatom1, vatom2);
2559
2560 case Iop_Add8x4:
2561 case Iop_HAdd8Ux4:
2562 case Iop_HAdd8Sx4:
2563 case Iop_Sub8x4:
2564 case Iop_HSub8Ux4:
2565 case Iop_HSub8Sx4:
2566 case Iop_QSub8Ux4:
2567 case Iop_QAdd8Ux4:
2568 case Iop_QSub8Sx4:
2569 case Iop_QAdd8Sx4:
2570 return binary8Ix4(mce, vatom1, vatom2);
2571
2572 /* 64-bit SIMD */
2573
2574 case Iop_ShrN8x8:
2575 case Iop_ShrN16x4:
2576 case Iop_ShrN32x2:
2577 case Iop_SarN8x8:
2578 case Iop_SarN16x4:
2579 case Iop_SarN32x2:
2580 case Iop_ShlN16x4:
2581 case Iop_ShlN32x2:
2582 case Iop_ShlN8x8:
2583 /* Same scheme as with all other shifts. */
2584 complainIfUndefined(mce, atom2, NULL);
2585 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2586
2587 case Iop_QNarrowBin32Sto16Sx4:
2588 case Iop_QNarrowBin16Sto8Sx8:
2589 case Iop_QNarrowBin16Sto8Ux8:
2590 return vectorNarrowBin64(mce, op, vatom1, vatom2);
2591
2592 case Iop_Min8Ux8:
2593 case Iop_Min8Sx8:
2594 case Iop_Max8Ux8:
2595 case Iop_Max8Sx8:
2596 case Iop_Avg8Ux8:
2597 case Iop_QSub8Sx8:
2598 case Iop_QSub8Ux8:
2599 case Iop_Sub8x8:
2600 case Iop_CmpGT8Sx8:
2601 case Iop_CmpGT8Ux8:
2602 case Iop_CmpEQ8x8:
2603 case Iop_QAdd8Sx8:
2604 case Iop_QAdd8Ux8:
2605 case Iop_QSal8x8:
2606 case Iop_QShl8x8:
2607 case Iop_Add8x8:
2608 case Iop_Mul8x8:
2609 case Iop_PolynomialMul8x8:
2610 return binary8Ix8(mce, vatom1, vatom2);
2611
2612 case Iop_Min16Sx4:
2613 case Iop_Min16Ux4:
2614 case Iop_Max16Sx4:
2615 case Iop_Max16Ux4:
2616 case Iop_Avg16Ux4:
2617 case Iop_QSub16Ux4:
2618 case Iop_QSub16Sx4:
2619 case Iop_Sub16x4:
2620 case Iop_Mul16x4:
2621 case Iop_MulHi16Sx4:
2622 case Iop_MulHi16Ux4:
2623 case Iop_CmpGT16Sx4:
2624 case Iop_CmpGT16Ux4:
2625 case Iop_CmpEQ16x4:
2626 case Iop_QAdd16Sx4:
2627 case Iop_QAdd16Ux4:
2628 case Iop_QSal16x4:
2629 case Iop_QShl16x4:
2630 case Iop_Add16x4:
2631 case Iop_QDMulHi16Sx4:
2632 case Iop_QRDMulHi16Sx4:
2633 return binary16Ix4(mce, vatom1, vatom2);
2634
2635 case Iop_Sub32x2:
2636 case Iop_Mul32x2:
2637 case Iop_Max32Sx2:
2638 case Iop_Max32Ux2:
2639 case Iop_Min32Sx2:
2640 case Iop_Min32Ux2:
2641 case Iop_CmpGT32Sx2:
2642 case Iop_CmpGT32Ux2:
2643 case Iop_CmpEQ32x2:
2644 case Iop_Add32x2:
2645 case Iop_QAdd32Ux2:
2646 case Iop_QAdd32Sx2:
2647 case Iop_QSub32Ux2:
2648 case Iop_QSub32Sx2:
2649 case Iop_QSal32x2:
2650 case Iop_QShl32x2:
2651 case Iop_QDMulHi32Sx2:
2652 case Iop_QRDMulHi32Sx2:
2653 return binary32Ix2(mce, vatom1, vatom2);
2654
2655 case Iop_QSub64Ux1:
2656 case Iop_QSub64Sx1:
2657 case Iop_QAdd64Ux1:
2658 case Iop_QAdd64Sx1:
2659 case Iop_QSal64x1:
2660 case Iop_QShl64x1:
2661 case Iop_Sal64x1:
2662 return binary64Ix1(mce, vatom1, vatom2);
2663
2664 case Iop_QShlN8Sx8:
2665 case Iop_QShlN8x8:
2666 case Iop_QSalN8x8:
2667 complainIfUndefined(mce, atom2, NULL);
2668 return mkPCast8x8(mce, vatom1);
2669
2670 case Iop_QShlN16Sx4:
2671 case Iop_QShlN16x4:
2672 case Iop_QSalN16x4:
2673 complainIfUndefined(mce, atom2, NULL);
2674 return mkPCast16x4(mce, vatom1);
2675
2676 case Iop_QShlN32Sx2:
2677 case Iop_QShlN32x2:
2678 case Iop_QSalN32x2:
2679 complainIfUndefined(mce, atom2, NULL);
2680 return mkPCast32x2(mce, vatom1);
2681
2682 case Iop_QShlN64Sx1:
2683 case Iop_QShlN64x1:
2684 case Iop_QSalN64x1:
2685 complainIfUndefined(mce, atom2, NULL);
2686 return mkPCast32x2(mce, vatom1);
2687
2688 case Iop_PwMax32Sx2:
2689 case Iop_PwMax32Ux2:
2690 case Iop_PwMin32Sx2:
2691 case Iop_PwMin32Ux2:
2692 case Iop_PwMax32Fx2:
2693 case Iop_PwMin32Fx2:
2694 return assignNew('V', mce, Ity_I64,
2695 binop(Iop_PwMax32Ux2,
2696 mkPCast32x2(mce, vatom1),
2697 mkPCast32x2(mce, vatom2)));
2698
2699 case Iop_PwMax16Sx4:
2700 case Iop_PwMax16Ux4:
2701 case Iop_PwMin16Sx4:
2702 case Iop_PwMin16Ux4:
2703 return assignNew('V', mce, Ity_I64,
2704 binop(Iop_PwMax16Ux4,
2705 mkPCast16x4(mce, vatom1),
2706 mkPCast16x4(mce, vatom2)));
2707
2708 case Iop_PwMax8Sx8:
2709 case Iop_PwMax8Ux8:
2710 case Iop_PwMin8Sx8:
2711 case Iop_PwMin8Ux8:
2712 return assignNew('V', mce, Ity_I64,
2713 binop(Iop_PwMax8Ux8,
2714 mkPCast8x8(mce, vatom1),
2715 mkPCast8x8(mce, vatom2)));
2716
2717 case Iop_PwAdd32x2:
2718 case Iop_PwAdd32Fx2:
2719 return mkPCast32x2(mce,
2720 assignNew('V', mce, Ity_I64,
2721 binop(Iop_PwAdd32x2,
2722 mkPCast32x2(mce, vatom1),
2723 mkPCast32x2(mce, vatom2))));
2724
2725 case Iop_PwAdd16x4:
2726 return mkPCast16x4(mce,
2727 assignNew('V', mce, Ity_I64,
2728 binop(op, mkPCast16x4(mce, vatom1),
2729 mkPCast16x4(mce, vatom2))));
2730
2731 case Iop_PwAdd8x8:
2732 return mkPCast8x8(mce,
2733 assignNew('V', mce, Ity_I64,
2734 binop(op, mkPCast8x8(mce, vatom1),
2735 mkPCast8x8(mce, vatom2))));
2736
2737 case Iop_Shl8x8:
2738 case Iop_Shr8x8:
2739 case Iop_Sar8x8:
2740 case Iop_Sal8x8:
2741 return mkUifU64(mce,
2742 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2743 mkPCast8x8(mce,vatom2)
2744 );
2745
2746 case Iop_Shl16x4:
2747 case Iop_Shr16x4:
2748 case Iop_Sar16x4:
2749 case Iop_Sal16x4:
2750 return mkUifU64(mce,
2751 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2752 mkPCast16x4(mce,vatom2)
2753 );
2754
2755 case Iop_Shl32x2:
2756 case Iop_Shr32x2:
2757 case Iop_Sar32x2:
2758 case Iop_Sal32x2:
2759 return mkUifU64(mce,
2760 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2761 mkPCast32x2(mce,vatom2)
2762 );
2763
2764 /* 64-bit data-steering */
2765 case Iop_InterleaveLO32x2:
2766 case Iop_InterleaveLO16x4:
2767 case Iop_InterleaveLO8x8:
2768 case Iop_InterleaveHI32x2:
2769 case Iop_InterleaveHI16x4:
2770 case Iop_InterleaveHI8x8:
2771 case Iop_CatOddLanes8x8:
2772 case Iop_CatEvenLanes8x8:
2773 case Iop_CatOddLanes16x4:
2774 case Iop_CatEvenLanes16x4:
2775 case Iop_InterleaveOddLanes8x8:
2776 case Iop_InterleaveEvenLanes8x8:
2777 case Iop_InterleaveOddLanes16x4:
2778 case Iop_InterleaveEvenLanes16x4:
2779 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
2780
2781 case Iop_GetElem8x8:
2782 complainIfUndefined(mce, atom2, NULL);
2783 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2784 case Iop_GetElem16x4:
2785 complainIfUndefined(mce, atom2, NULL);
2786 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2787 case Iop_GetElem32x2:
2788 complainIfUndefined(mce, atom2, NULL);
2789 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2790
2791 /* Perm8x8: rearrange values in left arg using steering values
2792 from right arg. So rearrange the vbits in the same way but
2793 pessimise wrt steering values. */
2794 case Iop_Perm8x8:
2795 return mkUifU64(
2796 mce,
2797 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2798 mkPCast8x8(mce, vatom2)
2799 );
2800
2801 /* V128-bit SIMD */
2802
2803 case Iop_ShrN8x16:
2804 case Iop_ShrN16x8:
2805 case Iop_ShrN32x4:
2806 case Iop_ShrN64x2:
2807 case Iop_SarN8x16:
2808 case Iop_SarN16x8:
2809 case Iop_SarN32x4:
2810 case Iop_SarN64x2:
2811 case Iop_ShlN8x16:
2812 case Iop_ShlN16x8:
2813 case Iop_ShlN32x4:
2814 case Iop_ShlN64x2:
2815 /* Same scheme as with all other shifts. Note: 22 Oct 05:
2816 this is wrong now, scalar shifts are done properly lazily.
2817 Vector shifts should be fixed too. */
2818 complainIfUndefined(mce, atom2, NULL);
2819 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
2820
2821 /* V x V shifts/rotates are done using the standard lazy scheme. */
2822 case Iop_Shl8x16:
2823 case Iop_Shr8x16:
2824 case Iop_Sar8x16:
2825 case Iop_Sal8x16:
2826 case Iop_Rol8x16:
2827 return mkUifUV128(mce,
2828 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2829 mkPCast8x16(mce,vatom2)
2830 );
2831
2832 case Iop_Shl16x8:
2833 case Iop_Shr16x8:
2834 case Iop_Sar16x8:
2835 case Iop_Sal16x8:
2836 case Iop_Rol16x8:
2837 return mkUifUV128(mce,
2838 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2839 mkPCast16x8(mce,vatom2)
2840 );
2841
2842 case Iop_Shl32x4:
2843 case Iop_Shr32x4:
2844 case Iop_Sar32x4:
2845 case Iop_Sal32x4:
2846 case Iop_Rol32x4:
2847 return mkUifUV128(mce,
2848 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2849 mkPCast32x4(mce,vatom2)
2850 );
2851
2852 case Iop_Shl64x2:
2853 case Iop_Shr64x2:
2854 case Iop_Sar64x2:
2855 case Iop_Sal64x2:
2856 return mkUifUV128(mce,
2857 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2858 mkPCast64x2(mce,vatom2)
2859 );
2860
2861 case Iop_F32ToFixed32Ux4_RZ:
2862 case Iop_F32ToFixed32Sx4_RZ:
2863 case Iop_Fixed32UToF32x4_RN:
2864 case Iop_Fixed32SToF32x4_RN:
2865 complainIfUndefined(mce, atom2, NULL);
2866 return mkPCast32x4(mce, vatom1);
2867
2868 case Iop_F32ToFixed32Ux2_RZ:
2869 case Iop_F32ToFixed32Sx2_RZ:
2870 case Iop_Fixed32UToF32x2_RN:
2871 case Iop_Fixed32SToF32x2_RN:
2872 complainIfUndefined(mce, atom2, NULL);
2873 return mkPCast32x2(mce, vatom1);
2874
2875 case Iop_QSub8Ux16:
2876 case Iop_QSub8Sx16:
2877 case Iop_Sub8x16:
2878 case Iop_Min8Ux16:
2879 case Iop_Min8Sx16:
2880 case Iop_Max8Ux16:
2881 case Iop_Max8Sx16:
2882 case Iop_CmpGT8Sx16:
2883 case Iop_CmpGT8Ux16:
2884 case Iop_CmpEQ8x16:
2885 case Iop_Avg8Ux16:
2886 case Iop_Avg8Sx16:
2887 case Iop_QAdd8Ux16:
2888 case Iop_QAdd8Sx16:
2889 case Iop_QSal8x16:
2890 case Iop_QShl8x16:
2891 case Iop_Add8x16:
2892 case Iop_Mul8x16:
2893 case Iop_PolynomialMul8x16:
2894 return binary8Ix16(mce, vatom1, vatom2);
2895
2896 case Iop_QSub16Ux8:
2897 case Iop_QSub16Sx8:
2898 case Iop_Sub16x8:
2899 case Iop_Mul16x8:
2900 case Iop_MulHi16Sx8:
2901 case Iop_MulHi16Ux8:
2902 case Iop_Min16Sx8:
2903 case Iop_Min16Ux8:
2904 case Iop_Max16Sx8:
2905 case Iop_Max16Ux8:
2906 case Iop_CmpGT16Sx8:
2907 case Iop_CmpGT16Ux8:
2908 case Iop_CmpEQ16x8:
2909 case Iop_Avg16Ux8:
2910 case Iop_Avg16Sx8:
2911 case Iop_QAdd16Ux8:
2912 case Iop_QAdd16Sx8:
2913 case Iop_QSal16x8:
2914 case Iop_QShl16x8:
2915 case Iop_Add16x8:
2916 case Iop_QDMulHi16Sx8:
2917 case Iop_QRDMulHi16Sx8:
2918 return binary16Ix8(mce, vatom1, vatom2);
2919
2920 case Iop_Sub32x4:
2921 case Iop_CmpGT32Sx4:
2922 case Iop_CmpGT32Ux4:
2923 case Iop_CmpEQ32x4:
2924 case Iop_QAdd32Sx4:
2925 case Iop_QAdd32Ux4:
2926 case Iop_QSub32Sx4:
2927 case Iop_QSub32Ux4:
2928 case Iop_QSal32x4:
2929 case Iop_QShl32x4:
2930 case Iop_Avg32Ux4:
2931 case Iop_Avg32Sx4:
2932 case Iop_Add32x4:
2933 case Iop_Max32Ux4:
2934 case Iop_Max32Sx4:
2935 case Iop_Min32Ux4:
2936 case Iop_Min32Sx4:
2937 case Iop_Mul32x4:
2938 case Iop_QDMulHi32Sx4:
2939 case Iop_QRDMulHi32Sx4:
2940 return binary32Ix4(mce, vatom1, vatom2);
2941
2942 case Iop_Sub64x2:
2943 case Iop_Add64x2:
2944 case Iop_CmpEQ64x2:
2945 case Iop_CmpGT64Sx2:
2946 case Iop_QSal64x2:
2947 case Iop_QShl64x2:
2948 case Iop_QAdd64Ux2:
2949 case Iop_QAdd64Sx2:
2950 case Iop_QSub64Ux2:
2951 case Iop_QSub64Sx2:
2952 return binary64Ix2(mce, vatom1, vatom2);
2953
2954 case Iop_QNarrowBin32Sto16Sx8:
2955 case Iop_QNarrowBin32Uto16Ux8:
2956 case Iop_QNarrowBin32Sto16Ux8:
2957 case Iop_QNarrowBin16Sto8Sx16:
2958 case Iop_QNarrowBin16Uto8Ux16:
2959 case Iop_QNarrowBin16Sto8Ux16:
2960 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
2961
2962 case Iop_Sub64Fx2:
2963 case Iop_Mul64Fx2:
2964 case Iop_Min64Fx2:
2965 case Iop_Max64Fx2:
2966 case Iop_Div64Fx2:
2967 case Iop_CmpLT64Fx2:
2968 case Iop_CmpLE64Fx2:
2969 case Iop_CmpEQ64Fx2:
2970 case Iop_CmpUN64Fx2:
2971 case Iop_Add64Fx2:
2972 return binary64Fx2(mce, vatom1, vatom2);
2973
2974 case Iop_Sub64F0x2:
2975 case Iop_Mul64F0x2:
2976 case Iop_Min64F0x2:
2977 case Iop_Max64F0x2:
2978 case Iop_Div64F0x2:
2979 case Iop_CmpLT64F0x2:
2980 case Iop_CmpLE64F0x2:
2981 case Iop_CmpEQ64F0x2:
2982 case Iop_CmpUN64F0x2:
2983 case Iop_Add64F0x2:
2984 return binary64F0x2(mce, vatom1, vatom2);
2985
2986 case Iop_Sub32Fx4:
2987 case Iop_Mul32Fx4:
2988 case Iop_Min32Fx4:
2989 case Iop_Max32Fx4:
2990 case Iop_Div32Fx4:
2991 case Iop_CmpLT32Fx4:
2992 case Iop_CmpLE32Fx4:
2993 case Iop_CmpEQ32Fx4:
2994 case Iop_CmpUN32Fx4:
2995 case Iop_CmpGT32Fx4:
2996 case Iop_CmpGE32Fx4:
2997 case Iop_Add32Fx4:
2998 case Iop_Recps32Fx4:
2999 case Iop_Rsqrts32Fx4:
3000 return binary32Fx4(mce, vatom1, vatom2);
3001
3002 case Iop_Sub32Fx2:
3003 case Iop_Mul32Fx2:
3004 case Iop_Min32Fx2:
3005 case Iop_Max32Fx2:
3006 case Iop_CmpEQ32Fx2:
3007 case Iop_CmpGT32Fx2:
3008 case Iop_CmpGE32Fx2:
3009 case Iop_Add32Fx2:
3010 case Iop_Recps32Fx2:
3011 case Iop_Rsqrts32Fx2:
3012 return binary32Fx2(mce, vatom1, vatom2);
3013
3014 case Iop_Sub32F0x4:
3015 case Iop_Mul32F0x4:
3016 case Iop_Min32F0x4:
3017 case Iop_Max32F0x4:
3018 case Iop_Div32F0x4:
3019 case Iop_CmpLT32F0x4:
3020 case Iop_CmpLE32F0x4:
3021 case Iop_CmpEQ32F0x4:
3022 case Iop_CmpUN32F0x4:
3023 case Iop_Add32F0x4:
3024 return binary32F0x4(mce, vatom1, vatom2);
3025
3026 case Iop_QShlN8Sx16:
3027 case Iop_QShlN8x16:
3028 case Iop_QSalN8x16:
3029 complainIfUndefined(mce, atom2, NULL);
3030 return mkPCast8x16(mce, vatom1);
3031
3032 case Iop_QShlN16Sx8:
3033 case Iop_QShlN16x8:
3034 case Iop_QSalN16x8:
3035 complainIfUndefined(mce, atom2, NULL);
3036 return mkPCast16x8(mce, vatom1);
3037
3038 case Iop_QShlN32Sx4:
3039 case Iop_QShlN32x4:
3040 case Iop_QSalN32x4:
3041 complainIfUndefined(mce, atom2, NULL);
3042 return mkPCast32x4(mce, vatom1);
3043
3044 case Iop_QShlN64Sx2:
3045 case Iop_QShlN64x2:
3046 case Iop_QSalN64x2:
3047 complainIfUndefined(mce, atom2, NULL);
3048 return mkPCast32x4(mce, vatom1);
3049
3050 case Iop_Mull32Sx2:
3051 case Iop_Mull32Ux2:
3052 case Iop_QDMulLong32Sx2:
3053 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3054 mkUifU64(mce, vatom1, vatom2));
3055
3056 case Iop_Mull16Sx4:
3057 case Iop_Mull16Ux4:
3058 case Iop_QDMulLong16Sx4:
3059 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3060 mkUifU64(mce, vatom1, vatom2));
3061
3062 case Iop_Mull8Sx8:
3063 case Iop_Mull8Ux8:
3064 case Iop_PolynomialMull8x8:
3065 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3066 mkUifU64(mce, vatom1, vatom2));
3067
3068 case Iop_PwAdd32x4:
3069 return mkPCast32x4(mce,
3070 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3071 mkPCast32x4(mce, vatom2))));
3072
3073 case Iop_PwAdd16x8:
3074 return mkPCast16x8(mce,
3075 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3076 mkPCast16x8(mce, vatom2))));
3077
3078 case Iop_PwAdd8x16:
3079 return mkPCast8x16(mce,
3080 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3081 mkPCast8x16(mce, vatom2))));
3082
3083 /* V128-bit data-steering */
3084 case Iop_SetV128lo32:
3085 case Iop_SetV128lo64:
3086 case Iop_64HLtoV128:
3087 case Iop_InterleaveLO64x2:
3088 case Iop_InterleaveLO32x4:
3089 case Iop_InterleaveLO16x8:
3090 case Iop_InterleaveLO8x16:
3091 case Iop_InterleaveHI64x2:
3092 case Iop_InterleaveHI32x4:
3093 case Iop_InterleaveHI16x8:
3094 case Iop_InterleaveHI8x16:
3095 case Iop_CatOddLanes8x16:
3096 case Iop_CatOddLanes16x8:
3097 case Iop_CatOddLanes32x4:
3098 case Iop_CatEvenLanes8x16:
3099 case Iop_CatEvenLanes16x8:
3100 case Iop_CatEvenLanes32x4:
3101 case Iop_InterleaveOddLanes8x16:
3102 case Iop_InterleaveOddLanes16x8:
3103 case Iop_InterleaveOddLanes32x4:
3104 case Iop_InterleaveEvenLanes8x16:
3105 case Iop_InterleaveEvenLanes16x8:
3106 case Iop_InterleaveEvenLanes32x4:
3107 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
3108
3109 case Iop_GetElem8x16:
3110 complainIfUndefined(mce, atom2, NULL);
3111 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3112 case Iop_GetElem16x8:
3113 complainIfUndefined(mce, atom2, NULL);
3114 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3115 case Iop_GetElem32x4:
3116 complainIfUndefined(mce, atom2, NULL);
3117 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3118 case Iop_GetElem64x2:
3119 complainIfUndefined(mce, atom2, NULL);
3120 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3121
3122 /* Perm8x16: rearrange values in left arg using steering values
3123 from right arg. So rearrange the vbits in the same way but
3124 pessimise wrt steering values. Perm32x4 ditto. */
3125 case Iop_Perm8x16:
3126 return mkUifUV128(
3127 mce,
3128 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3129 mkPCast8x16(mce, vatom2)
3130 );
3131 case Iop_Perm32x4:
3132 return mkUifUV128(
3133 mce,
3134 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3135 mkPCast32x4(mce, vatom2)
3136 );
3137
3138 /* These two take the lower half of each 16-bit lane, sign/zero
3139 extend it to 32, and multiply together, producing a 32x4
3140 result (and implicitly ignoring half the operand bits). So
3141 treat it as a bunch of independent 16x8 operations, but then
3142 do 32-bit shifts left-right to copy the lower half results
3143 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3144 into the upper half of each result lane. */
3145 case Iop_MullEven16Ux8:
3146 case Iop_MullEven16Sx8: {
3147 IRAtom* at;
3148 at = binary16Ix8(mce,vatom1,vatom2);
3149 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3150 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
3151 return at;
3152 }
3153
3154 /* Same deal as Iop_MullEven16{S,U}x8 */
3155 case Iop_MullEven8Ux16:
3156 case Iop_MullEven8Sx16: {
3157 IRAtom* at;
3158 at = binary8Ix16(mce,vatom1,vatom2);
3159 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3160 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
3161 return at;
3162 }
3163
3164 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3165 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3166 Simply apply same op to the V bits, since this really no more
3167 than a data steering operation. */
3168 case Iop_NarrowBin32to16x8:
3169 case Iop_NarrowBin16to8x16:
3170 return assignNew('V', mce, Ity_V128,
3171 binop(op, vatom1, vatom2));
3172
3173 case Iop_ShrV128:
3174 case Iop_ShlV128:
3175 /* Same scheme as with all other shifts. Note: 10 Nov 05:
3176 this is wrong now, scalar shifts are done properly lazily.
3177 Vector shifts should be fixed too. */
3178 complainIfUndefined(mce, atom2, NULL);
3179 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3180
3181 /* I128-bit data-steering */
3182 case Iop_64HLto128:
3183 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
3184
3185 /* V256-bit SIMD */
3186
3187 case Iop_Add64Fx4:
3188 case Iop_Sub64Fx4:
3189 case Iop_Mul64Fx4:
3190 case Iop_Div64Fx4:
3191 case Iop_Max64Fx4:
3192 case Iop_Min64Fx4:
3193 return binary64Fx4(mce, vatom1, vatom2);
3194
3195 case Iop_Add32Fx8:
3196 case Iop_Sub32Fx8:
3197 case Iop_Mul32Fx8:
3198 case Iop_Div32Fx8:
3199 case Iop_Max32Fx8:
3200 case Iop_Min32Fx8:
3201 return binary32Fx8(mce, vatom1, vatom2);
3202
3203 /* V256-bit data-steering */
3204 case Iop_V128HLtoV256:
3205 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3206
3207 /* Scalar floating point */
3208
3209 case Iop_F32toI64S:
3210 /* I32(rm) x F32 -> I64 */
3211 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3212
3213 case Iop_I64StoF32:
3214 /* I32(rm) x I64 -> F32 */
3215 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3216
3217 case Iop_RoundF64toInt:
3218 case Iop_RoundF64toF32:
3219 case Iop_F64toI64S:
3220 case Iop_F64toI64U:
3221 case Iop_I64StoF64:
3222 case Iop_I64UtoF64:
3223 case Iop_SinF64:
3224 case Iop_CosF64:
3225 case Iop_TanF64:
3226 case Iop_2xm1F64:
3227 case Iop_SqrtF64:
3228 /* I32(rm) x I64/F64 -> I64/F64 */
3229 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3230
3231 case Iop_ShlD64:
3232 case Iop_ShrD64:
3233 case Iop_RoundD64toInt:
3234 /* I32(DFP rm) x D64 -> D64 */
3235 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3236
3237 case Iop_ShlD128:
3238 case Iop_ShrD128:
3239 case Iop_RoundD128toInt:
3240 /* I32(DFP rm) x D128 -> D128 */
3241 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3242
3243 case Iop_D64toI64S:
3244 case Iop_I64StoD64:
3245 /* I64(DFP rm) x I64 -> D64 */
3246 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3247
3248 case Iop_RoundF32toInt:
3249 case Iop_SqrtF32:
3250 /* I32(rm) x I32/F32 -> I32/F32 */
3251 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3252
3253 case Iop_SqrtF128:
3254 /* I32(rm) x F128 -> F128 */
3255 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3256
3257 case Iop_I32StoF32:
3258 case Iop_F32toI32S:
3259 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3260 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3261
3262 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
3263 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
3264 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3265
3266 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
3267 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
3268 case Iop_D128toD64: /* IRRoundingModeDFP(I64) x D128 -> D64 */
3269 case Iop_D128toI64S: /* IRRoundingModeDFP(I64) x D128 -> signed I64 */
3270 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3271
3272 case Iop_F64HLtoF128:
3273 case Iop_D64HLtoD128:
3274 return assignNew('V', mce, Ity_I128,
3275 binop(Iop_64HLto128, vatom1, vatom2));
3276
3277 case Iop_F64toI32U:
3278 case Iop_F64toI32S:
3279 case Iop_F64toF32:
3280 case Iop_I64UtoF32:
3281 /* First arg is I32 (rounding mode), second is F64 (data). */
3282 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3283
3284 case Iop_D64toD32:
3285 /* First arg is I64 (DFProunding mode), second is D64 (data). */
3286 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3287
3288 case Iop_F64toI16S:
3289 /* First arg is I32 (rounding mode), second is F64 (data). */
3290 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3291
3292 case Iop_InsertExpD64:
3293 /* I64 x I64 -> D64 */
3294 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3295
3296 case Iop_InsertExpD128:
3297 /* I64 x I128 -> D128 */
3298 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3299
3300 case Iop_CmpF32:
3301 case Iop_CmpF64:
3302 case Iop_CmpF128:
3303 case Iop_CmpD64:
3304 case Iop_CmpD128:
3305 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3306
3307 /* non-FP after here */
3308
3309 case Iop_DivModU64to32:
3310 case Iop_DivModS64to32:
3311 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3312
3313 case Iop_DivModU128to64:
3314 case Iop_DivModS128to64:
3315 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3316
3317 case Iop_16HLto32:
3318 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
3319 case Iop_32HLto64:
3320 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3321
3322 case Iop_DivModS64to64:
3323 case Iop_MullS64:
3324 case Iop_MullU64: {
3325 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3326 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
3327 return assignNew('V', mce, Ity_I128,
3328 binop(Iop_64HLto128, vHi64, vLo64));
3329 }
3330
3331 case Iop_MullS32:
3332 case Iop_MullU32: {
3333 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3334 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
3335 return assignNew('V', mce, Ity_I64,
3336 binop(Iop_32HLto64, vHi32, vLo32));
3337 }
3338
3339 case Iop_MullS16:
3340 case Iop_MullU16: {
3341 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3342 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
3343 return assignNew('V', mce, Ity_I32,
3344 binop(Iop_16HLto32, vHi16, vLo16));
3345 }
3346
3347 case Iop_MullS8:
3348 case Iop_MullU8: {
3349 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3350 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
3351 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
3352 }
3353
3354 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
3355 case Iop_DivS32:
3356 case Iop_DivU32:
3357 case Iop_DivU32E:
3358 case Iop_DivS32E:
3359 case Iop_QAdd32S: /* could probably do better */
3360 case Iop_QSub32S: /* could probably do better */
3361 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3362
3363 case Iop_DivS64:
3364 case Iop_DivU64:
3365 case Iop_DivS64E:
3366 case Iop_DivU64E:
3367 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3368
3369 case Iop_Add32:
3370 if (mce->bogusLiterals || mce->useLLVMworkarounds)
3371 return expensiveAddSub(mce,True,Ity_I32,
3372 vatom1,vatom2, atom1,atom2);
3373 else
3374 goto cheap_AddSub32;
3375 case Iop_Sub32:
3376 if (mce->bogusLiterals)
3377 return expensiveAddSub(mce,False,Ity_I32,
3378 vatom1,vatom2, atom1,atom2);
3379 else
3380 goto cheap_AddSub32;
3381
3382 cheap_AddSub32:
3383 case Iop_Mul32:
3384 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3385
3386 case Iop_CmpORD32S:
3387 case Iop_CmpORD32U:
3388 case Iop_CmpORD64S:
3389 case Iop_CmpORD64U:
3390 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
3391
3392 case Iop_Add64:
3393 if (mce->bogusLiterals || mce->useLLVMworkarounds)
3394 return expensiveAddSub(mce,True,Ity_I64,
3395 vatom1,vatom2, atom1,atom2);
3396 else
3397 goto cheap_AddSub64;
3398 case Iop_Sub64:
3399 if (mce->bogusLiterals)
3400 return expensiveAddSub(mce,False,Ity_I64,
3401 vatom1,vatom2, atom1,atom2);
3402 else
3403 goto cheap_AddSub64;
3404
3405 cheap_AddSub64:
3406 case Iop_Mul64:
3407 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3408
3409 case Iop_Mul16:
3410 case Iop_Add16:
3411 case Iop_Sub16:
3412 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3413
3414 case Iop_Sub8:
3415 case Iop_Add8:
3416 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3417
3418 case Iop_CmpEQ64:
3419 case Iop_CmpNE64:
3420 if (mce->bogusLiterals)
3421 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3422 else
3423 goto cheap_cmp64;
3424 cheap_cmp64:
3425 case Iop_CmpLE64S: case Iop_CmpLE64U:
3426 case Iop_CmpLT64U: case Iop_CmpLT64S:
3427 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3428
3429 case Iop_CmpEQ32:
3430 case Iop_CmpNE32:
3431 if (mce->bogusLiterals)
3432 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3433 else
3434 goto cheap_cmp32;
3435 cheap_cmp32:
3436 case Iop_CmpLE32S: case Iop_CmpLE32U:
3437 case Iop_CmpLT32U: case Iop_CmpLT32S:
3438 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3439
3440 case Iop_CmpEQ16: case Iop_CmpNE16:
3441 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3442
3443 case Iop_CmpEQ8: case Iop_CmpNE8:
3444 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3445
3446 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
3447 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3448 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3449 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3450 /* Just say these all produce a defined result, regardless
3451 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
3452 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3453
3454 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3455 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3456
3457 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
3458 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
3459
3460 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
3461 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
3462
3463 case Iop_Shl8: case Iop_Shr8:
3464 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
3465
3466 case Iop_AndV256:
3467 uifu = mkUifUV256; difd = mkDifDV256;
3468 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
3469 case Iop_AndV128:
3470 uifu = mkUifUV128; difd = mkDifDV128;
3471 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
3472 case Iop_And64:
3473 uifu = mkUifU64; difd = mkDifD64;
3474 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
3475 case Iop_And32:
3476 uifu = mkUifU32; difd = mkDifD32;
3477 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3478 case Iop_And16:
3479 uifu = mkUifU16; difd = mkDifD16;
3480 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3481 case Iop_And8:
3482 uifu = mkUifU8; difd = mkDifD8;
3483 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3484
3485 case Iop_OrV256:
3486 uifu = mkUifUV256; difd = mkDifDV256;
3487 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
3488 case Iop_OrV128:
3489 uifu = mkUifUV128; difd = mkDifDV128;
3490 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
3491 case Iop_Or64:
3492 uifu = mkUifU64; difd = mkDifD64;
3493 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
3494 case Iop_Or32:
3495 uifu = mkUifU32; difd = mkDifD32;
3496 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3497 case Iop_Or16:
3498 uifu = mkUifU16; difd = mkDifD16;
3499 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3500 case Iop_Or8:
3501 uifu = mkUifU8; difd = mkDifD8;
3502 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3503
3504 do_And_Or:
3505 return
3506 assignNew(
3507 'V', mce,
3508 and_or_ty,
3509 difd(mce, uifu(mce, vatom1, vatom2),
3510 difd(mce, improve(mce, atom1, vatom1),
3511 improve(mce, atom2, vatom2) ) ) );
3512
3513 case Iop_Xor8:
3514 return mkUifU8(mce, vatom1, vatom2);
3515 case Iop_Xor16:
3516 return mkUifU16(mce, vatom1, vatom2);
3517 case Iop_Xor32:
3518 return mkUifU32(mce, vatom1, vatom2);
3519 case Iop_Xor64:
3520 return mkUifU64(mce, vatom1, vatom2);
3521 case Iop_XorV128:
3522 return mkUifUV128(mce, vatom1, vatom2);
3523 case Iop_XorV256:
3524 return mkUifUV256(mce, vatom1, vatom2);
3525
3526 default:
3527 ppIROp(op);
3528 VG_(tool_panic)("memcheck:expr2vbits_Binop");
3529 }
3530 }
3531
3532
3533 static
expr2vbits_Unop(MCEnv * mce,IROp op,IRAtom * atom)3534 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3535 {
3536 IRAtom* vatom = expr2vbits( mce, atom );
3537 tl_assert(isOriginalAtom(mce,atom));
3538 switch (op) {
3539
3540 case Iop_Sqrt64Fx2:
3541 return unary64Fx2(mce, vatom);
3542
3543 case Iop_Sqrt64F0x2:
3544 return unary64F0x2(mce, vatom);
3545
3546 case Iop_Sqrt32Fx8:
3547 case Iop_RSqrt32Fx8:
3548 case Iop_Recip32Fx8:
3549 return unary32Fx8(mce, vatom);
3550
3551 case Iop_Sqrt64Fx4:
3552 return unary64Fx4(mce, vatom);
3553
3554 case Iop_Sqrt32Fx4:
3555 case Iop_RSqrt32Fx4:
3556 case Iop_Recip32Fx4:
3557 case Iop_I32UtoFx4:
3558 case Iop_I32StoFx4:
3559 case Iop_QFtoI32Ux4_RZ:
3560 case Iop_QFtoI32Sx4_RZ:
3561 case Iop_RoundF32x4_RM:
3562 case Iop_RoundF32x4_RP:
3563 case Iop_RoundF32x4_RN:
3564 case Iop_RoundF32x4_RZ:
3565 case Iop_Recip32x4:
3566 case Iop_Abs32Fx4:
3567 case Iop_Neg32Fx4:
3568 case Iop_Rsqrte32Fx4:
3569 return unary32Fx4(mce, vatom);
3570
3571 case Iop_I32UtoFx2:
3572 case Iop_I32StoFx2:
3573 case Iop_Recip32Fx2:
3574 case Iop_Recip32x2:
3575 case Iop_Abs32Fx2:
3576 case Iop_Neg32Fx2:
3577 case Iop_Rsqrte32Fx2:
3578 return unary32Fx2(mce, vatom);
3579
3580 case Iop_Sqrt32F0x4:
3581 case Iop_RSqrt32F0x4:
3582 case Iop_Recip32F0x4:
3583 return unary32F0x4(mce, vatom);
3584
3585 case Iop_32UtoV128:
3586 case Iop_64UtoV128:
3587 case Iop_Dup8x16:
3588 case Iop_Dup16x8:
3589 case Iop_Dup32x4:
3590 case Iop_Reverse16_8x16:
3591 case Iop_Reverse32_8x16:
3592 case Iop_Reverse32_16x8:
3593 case Iop_Reverse64_8x16:
3594 case Iop_Reverse64_16x8:
3595 case Iop_Reverse64_32x4:
3596 case Iop_V256toV128_1: case Iop_V256toV128_0:
3597 return assignNew('V', mce, Ity_V128, unop(op, vatom));
3598
3599 case Iop_F128HItoF64: /* F128 -> high half of F128 */
3600 case Iop_D128HItoD64: /* D128 -> high half of D128 */
3601 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3602 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
3603 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
3604 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3605
3606 case Iop_NegF128:
3607 case Iop_AbsF128:
3608 return mkPCastTo(mce, Ity_I128, vatom);
3609
3610 case Iop_I32StoF128: /* signed I32 -> F128 */
3611 case Iop_I64StoF128: /* signed I64 -> F128 */
3612 case Iop_F32toF128: /* F32 -> F128 */
3613 case Iop_F64toF128: /* F64 -> F128 */
3614 case Iop_I64StoD128: /* signed I64 -> D128 */
3615 return mkPCastTo(mce, Ity_I128, vatom);
3616
3617 case Iop_F32toF64:
3618 case Iop_I32StoF64:
3619 case Iop_I32UtoF64:
3620 case Iop_NegF64:
3621 case Iop_AbsF64:
3622 case Iop_Est5FRSqrt:
3623 case Iop_RoundF64toF64_NEAREST:
3624 case Iop_RoundF64toF64_NegINF:
3625 case Iop_RoundF64toF64_PosINF:
3626 case Iop_RoundF64toF64_ZERO:
3627 case Iop_Clz64:
3628 case Iop_Ctz64:
3629 case Iop_D32toD64:
3630 case Iop_ExtractExpD64: /* D64 -> I64 */
3631 case Iop_ExtractExpD128: /* D128 -> I64 */
3632 case Iop_DPBtoBCD:
3633 case Iop_BCDtoDPB:
3634 return mkPCastTo(mce, Ity_I64, vatom);
3635
3636 case Iop_D64toD128:
3637 return mkPCastTo(mce, Ity_I128, vatom);
3638
3639 case Iop_Clz32:
3640 case Iop_Ctz32:
3641 case Iop_TruncF64asF32:
3642 case Iop_NegF32:
3643 case Iop_AbsF32:
3644 return mkPCastTo(mce, Ity_I32, vatom);
3645
3646 case Iop_1Uto64:
3647 case Iop_1Sto64:
3648 case Iop_8Uto64:
3649 case Iop_8Sto64:
3650 case Iop_16Uto64:
3651 case Iop_16Sto64:
3652 case Iop_32Sto64:
3653 case Iop_32Uto64:
3654 case Iop_V128to64:
3655 case Iop_V128HIto64:
3656 case Iop_128HIto64:
3657 case Iop_128to64:
3658 case Iop_Dup8x8:
3659 case Iop_Dup16x4:
3660 case Iop_Dup32x2:
3661 case Iop_Reverse16_8x8:
3662 case Iop_Reverse32_8x8:
3663 case Iop_Reverse32_16x4:
3664 case Iop_Reverse64_8x8:
3665 case Iop_Reverse64_16x4:
3666 case Iop_Reverse64_32x2:
3667 case Iop_V256to64_0: case Iop_V256to64_1:
3668 case Iop_V256to64_2: case Iop_V256to64_3:
3669 return assignNew('V', mce, Ity_I64, unop(op, vatom));
3670
3671 case Iop_I16StoF32:
3672 case Iop_64to32:
3673 case Iop_64HIto32:
3674 case Iop_1Uto32:
3675 case Iop_1Sto32:
3676 case Iop_8Uto32:
3677 case Iop_16Uto32:
3678 case Iop_16Sto32:
3679 case Iop_8Sto32:
3680 case Iop_V128to32:
3681 return assignNew('V', mce, Ity_I32, unop(op, vatom));
3682
3683 case Iop_8Sto16:
3684 case Iop_8Uto16:
3685 case Iop_32to16:
3686 case Iop_32HIto16:
3687 case Iop_64to16:
3688 return assignNew('V', mce, Ity_I16, unop(op, vatom));
3689
3690 case Iop_1Uto8:
3691 case Iop_1Sto8:
3692 case Iop_16to8:
3693 case Iop_16HIto8:
3694 case Iop_32to8:
3695 case Iop_64to8:
3696 return assignNew('V', mce, Ity_I8, unop(op, vatom));
3697
3698 case Iop_32to1:
3699 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
3700
3701 case Iop_64to1:
3702 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
3703
3704 case Iop_ReinterpF64asI64:
3705 case Iop_ReinterpI64asF64:
3706 case Iop_ReinterpI32asF32:
3707 case Iop_ReinterpF32asI32:
3708 case Iop_ReinterpI64asD64:
3709 case Iop_ReinterpD64asI64:
3710 case Iop_NotV256:
3711 case Iop_NotV128:
3712 case Iop_Not64:
3713 case Iop_Not32:
3714 case Iop_Not16:
3715 case Iop_Not8:
3716 case Iop_Not1:
3717 return vatom;
3718
3719 case Iop_CmpNEZ8x8:
3720 case Iop_Cnt8x8:
3721 case Iop_Clz8Sx8:
3722 case Iop_Cls8Sx8:
3723 case Iop_Abs8x8:
3724 return mkPCast8x8(mce, vatom);
3725
3726 case Iop_CmpNEZ8x16:
3727 case Iop_Cnt8x16:
3728 case Iop_Clz8Sx16:
3729 case Iop_Cls8Sx16:
3730 case Iop_Abs8x16:
3731 return mkPCast8x16(mce, vatom);
3732
3733 case Iop_CmpNEZ16x4:
3734 case Iop_Clz16Sx4:
3735 case Iop_Cls16Sx4:
3736 case Iop_Abs16x4:
3737 return mkPCast16x4(mce, vatom);
3738
3739 case Iop_CmpNEZ16x8:
3740 case Iop_Clz16Sx8:
3741 case Iop_Cls16Sx8:
3742 case Iop_Abs16x8:
3743 return mkPCast16x8(mce, vatom);
3744
3745 case Iop_CmpNEZ32x2:
3746 case Iop_Clz32Sx2:
3747 case Iop_Cls32Sx2:
3748 case Iop_FtoI32Ux2_RZ:
3749 case Iop_FtoI32Sx2_RZ:
3750 case Iop_Abs32x2:
3751 return mkPCast32x2(mce, vatom);
3752
3753 case Iop_CmpNEZ32x4:
3754 case Iop_Clz32Sx4:
3755 case Iop_Cls32Sx4:
3756 case Iop_FtoI32Ux4_RZ:
3757 case Iop_FtoI32Sx4_RZ:
3758 case Iop_Abs32x4:
3759 return mkPCast32x4(mce, vatom);
3760
3761 case Iop_CmpwNEZ64:
3762 return mkPCastTo(mce, Ity_I64, vatom);
3763
3764 case Iop_CmpNEZ64x2:
3765 return mkPCast64x2(mce, vatom);
3766
3767 case Iop_NarrowUn16to8x8:
3768 case Iop_NarrowUn32to16x4:
3769 case Iop_NarrowUn64to32x2:
3770 case Iop_QNarrowUn16Sto8Sx8:
3771 case Iop_QNarrowUn16Sto8Ux8:
3772 case Iop_QNarrowUn16Uto8Ux8:
3773 case Iop_QNarrowUn32Sto16Sx4:
3774 case Iop_QNarrowUn32Sto16Ux4:
3775 case Iop_QNarrowUn32Uto16Ux4:
3776 case Iop_QNarrowUn64Sto32Sx2:
3777 case Iop_QNarrowUn64Sto32Ux2:
3778 case Iop_QNarrowUn64Uto32Ux2:
3779 return vectorNarrowUnV128(mce, op, vatom);
3780
3781 case Iop_Widen8Sto16x8:
3782 case Iop_Widen8Uto16x8:
3783 case Iop_Widen16Sto32x4:
3784 case Iop_Widen16Uto32x4:
3785 case Iop_Widen32Sto64x2:
3786 case Iop_Widen32Uto64x2:
3787 return vectorWidenI64(mce, op, vatom);
3788
3789 case Iop_PwAddL32Ux2:
3790 case Iop_PwAddL32Sx2:
3791 return mkPCastTo(mce, Ity_I64,
3792 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3793
3794 case Iop_PwAddL16Ux4:
3795 case Iop_PwAddL16Sx4:
3796 return mkPCast32x2(mce,
3797 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3798
3799 case Iop_PwAddL8Ux8:
3800 case Iop_PwAddL8Sx8:
3801 return mkPCast16x4(mce,
3802 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3803
3804 case Iop_PwAddL32Ux4:
3805 case Iop_PwAddL32Sx4:
3806 return mkPCast64x2(mce,
3807 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3808
3809 case Iop_PwAddL16Ux8:
3810 case Iop_PwAddL16Sx8:
3811 return mkPCast32x4(mce,
3812 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3813
3814 case Iop_PwAddL8Ux16:
3815 case Iop_PwAddL8Sx16:
3816 return mkPCast16x8(mce,
3817 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3818
3819 case Iop_I64UtoF32:
3820 default:
3821 ppIROp(op);
3822 VG_(tool_panic)("memcheck:expr2vbits_Unop");
3823 }
3824 }
3825
3826
3827 /* Worker function; do not call directly. */
3828 static
expr2vbits_Load_WRK(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias)3829 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3830 IREndness end, IRType ty,
3831 IRAtom* addr, UInt bias )
3832 {
3833 void* helper;
3834 Char* hname;
3835 IRDirty* di;
3836 IRTemp datavbits;
3837 IRAtom* addrAct;
3838
3839 tl_assert(isOriginalAtom(mce,addr));
3840 tl_assert(end == Iend_LE || end == Iend_BE);
3841
3842 /* First, emit a definedness test for the address. This also sets
3843 the address (shadow) to 'defined' following the test. */
3844 complainIfUndefined( mce, addr, NULL );
3845
3846 /* Now cook up a call to the relevant helper function, to read the
3847 data V bits from shadow memory. */
3848 ty = shadowTypeV(ty);
3849
3850 if (end == Iend_LE) {
3851 switch (ty) {
3852 case Ity_I64: helper = &MC_(helperc_LOADV64le);
3853 hname = "MC_(helperc_LOADV64le)";
3854 break;
3855 case Ity_I32: helper = &MC_(helperc_LOADV32le);
3856 hname = "MC_(helperc_LOADV32le)";
3857 break;
3858 case Ity_I16: helper = &MC_(helperc_LOADV16le);
3859 hname = "MC_(helperc_LOADV16le)";
3860 break;
3861 case Ity_I8: helper = &MC_(helperc_LOADV8);
3862 hname = "MC_(helperc_LOADV8)";
3863 break;
3864 default: ppIRType(ty);
3865 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3866 }
3867 } else {
3868 switch (ty) {
3869 case Ity_I64: helper = &MC_(helperc_LOADV64be);
3870 hname = "MC_(helperc_LOADV64be)";
3871 break;
3872 case Ity_I32: helper = &MC_(helperc_LOADV32be);
3873 hname = "MC_(helperc_LOADV32be)";
3874 break;
3875 case Ity_I16: helper = &MC_(helperc_LOADV16be);
3876 hname = "MC_(helperc_LOADV16be)";
3877 break;
3878 case Ity_I8: helper = &MC_(helperc_LOADV8);
3879 hname = "MC_(helperc_LOADV8)";
3880 break;
3881 default: ppIRType(ty);
3882 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3883 }
3884 }
3885
3886 /* Generate the actual address into addrAct. */
3887 if (bias == 0) {
3888 addrAct = addr;
3889 } else {
3890 IROp mkAdd;
3891 IRAtom* eBias;
3892 IRType tyAddr = mce->hWordTy;
3893 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
3894 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3895 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
3896 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
3897 }
3898
3899 /* We need to have a place to park the V bits we're just about to
3900 read. */
3901 datavbits = newTemp(mce, ty, VSh);
3902 di = unsafeIRDirty_1_N( datavbits,
3903 1/*regparms*/,
3904 hname, VG_(fnptr_to_fnentry)( helper ),
3905 mkIRExprVec_1( addrAct ));
3906 setHelperAnns( mce, di );
3907 stmt( 'V', mce, IRStmt_Dirty(di) );
3908
3909 return mkexpr(datavbits);
3910 }
3911
3912
3913 static
expr2vbits_Load(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias)3914 IRAtom* expr2vbits_Load ( MCEnv* mce,
3915 IREndness end, IRType ty,
3916 IRAtom* addr, UInt bias )
3917 {
3918 tl_assert(end == Iend_LE || end == Iend_BE);
3919 switch (shadowTypeV(ty)) {
3920 case Ity_I8:
3921 case Ity_I16:
3922 case Ity_I32:
3923 case Ity_I64:
3924 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
3925 case Ity_V128: {
3926 IRAtom *v64hi, *v64lo;
3927 if (end == Iend_LE) {
3928 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
3929 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3930 } else {
3931 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
3932 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3933 }
3934 return assignNew( 'V', mce,
3935 Ity_V128,
3936 binop(Iop_64HLtoV128, v64hi, v64lo));
3937 }
3938 case Ity_V256: {
3939 /* V256-bit case -- phrased in terms of 64 bit units (Qs),
3940 with Q3 being the most significant lane. */
3941 if (end == Iend_BE) goto unhandled;
3942 IRAtom* v64Q0 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
3943 IRAtom* v64Q1 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3944 IRAtom* v64Q2 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16);
3945 IRAtom* v64Q3 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24);
3946 return assignNew( 'V', mce,
3947 Ity_V256,
3948 IRExpr_Qop(Iop_64x4toV256,
3949 v64Q3, v64Q2, v64Q1, v64Q0));
3950 }
3951 unhandled:
3952 default:
3953 VG_(tool_panic)("expr2vbits_Load");
3954 }
3955 }
3956
3957
3958 /* If there is no guard expression or the guard is always TRUE this function
3959 behaves like expr2vbits_Load. If the guard is not true at runtime, an
3960 all-bits-defined bit pattern will be returned.
3961 It is assumed that definedness of GUARD has already been checked at the call
3962 site. */
3963 static
expr2vbits_guarded_Load(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard)3964 IRAtom* expr2vbits_guarded_Load ( MCEnv* mce,
3965 IREndness end, IRType ty,
3966 IRAtom* addr, UInt bias, IRAtom *guard )
3967 {
3968 if (guard) {
3969 IRAtom *cond, *iffalse, *iftrue;
3970
3971 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
3972 iftrue = assignNew('V', mce, ty,
3973 expr2vbits_Load(mce, end, ty, addr, bias));
3974 iffalse = assignNew('V', mce, ty, definedOfType(ty));
3975
3976 return assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, iftrue));
3977 }
3978
3979 /* No guard expression or unconditional load */
3980 return expr2vbits_Load(mce, end, ty, addr, bias);
3981 }
3982
3983
3984 static
expr2vbits_Mux0X(MCEnv * mce,IRAtom * cond,IRAtom * expr0,IRAtom * exprX)3985 IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
3986 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
3987 {
3988 IRAtom *vbitsC, *vbits0, *vbitsX;
3989 IRType ty;
3990 /* Given Mux0X(cond,expr0,exprX), generate
3991 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
3992 That is, steer the V bits like the originals, but trash the
3993 result if the steering value is undefined. This gives
3994 lazy propagation. */
3995 tl_assert(isOriginalAtom(mce, cond));
3996 tl_assert(isOriginalAtom(mce, expr0));
3997 tl_assert(isOriginalAtom(mce, exprX));
3998
3999 vbitsC = expr2vbits(mce, cond);
4000 vbits0 = expr2vbits(mce, expr0);
4001 vbitsX = expr2vbits(mce, exprX);
4002 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
4003
4004 return
4005 mkUifU(mce, ty, assignNew('V', mce, ty,
4006 IRExpr_Mux0X(cond, vbits0, vbitsX)),
4007 mkPCastTo(mce, ty, vbitsC) );
4008 }
4009
4010 /* --------- This is the main expression-handling function. --------- */
4011
4012 static
expr2vbits(MCEnv * mce,IRExpr * e)4013 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4014 {
4015 switch (e->tag) {
4016
4017 case Iex_Get:
4018 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4019
4020 case Iex_GetI:
4021 return shadow_GETI( mce, e->Iex.GetI.descr,
4022 e->Iex.GetI.ix, e->Iex.GetI.bias );
4023
4024 case Iex_RdTmp:
4025 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
4026
4027 case Iex_Const:
4028 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
4029
4030 case Iex_Qop:
4031 return expr2vbits_Qop(
4032 mce,
4033 e->Iex.Qop.details->op,
4034 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4035 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
4036 );
4037
4038 case Iex_Triop:
4039 return expr2vbits_Triop(
4040 mce,
4041 e->Iex.Triop.details->op,
4042 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4043 e->Iex.Triop.details->arg3
4044 );
4045
4046 case Iex_Binop:
4047 return expr2vbits_Binop(
4048 mce,
4049 e->Iex.Binop.op,
4050 e->Iex.Binop.arg1, e->Iex.Binop.arg2
4051 );
4052
4053 case Iex_Unop:
4054 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4055
4056 case Iex_Load:
4057 return expr2vbits_Load( mce, e->Iex.Load.end,
4058 e->Iex.Load.ty,
4059 e->Iex.Load.addr, 0/*addr bias*/ );
4060
4061 case Iex_CCall:
4062 return mkLazyN( mce, e->Iex.CCall.args,
4063 e->Iex.CCall.retty,
4064 e->Iex.CCall.cee );
4065
4066 case Iex_Mux0X:
4067 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
4068 e->Iex.Mux0X.exprX);
4069
4070 default:
4071 VG_(printf)("\n");
4072 ppIRExpr(e);
4073 VG_(printf)("\n");
4074 VG_(tool_panic)("memcheck: expr2vbits");
4075 }
4076 }
4077
4078 /*------------------------------------------------------------*/
4079 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
4080 /*------------------------------------------------------------*/
4081
4082 /* Widen a value to the host word size. */
4083
4084 static
zwidenToHostWord(MCEnv * mce,IRAtom * vatom)4085 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
4086 {
4087 IRType ty, tyH;
4088
4089 /* vatom is vbits-value and as such can only have a shadow type. */
4090 tl_assert(isShadowAtom(mce,vatom));
4091
4092 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
4093 tyH = mce->hWordTy;
4094
4095 if (tyH == Ity_I32) {
4096 switch (ty) {
4097 case Ity_I32:
4098 return vatom;
4099 case Ity_I16:
4100 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4101 case Ity_I8:
4102 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4103 default:
4104 goto unhandled;
4105 }
4106 } else
4107 if (tyH == Ity_I64) {
4108 switch (ty) {
4109 case Ity_I32:
4110 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4111 case Ity_I16:
4112 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4113 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4114 case Ity_I8:
4115 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4116 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4117 default:
4118 goto unhandled;
4119 }
4120 } else {
4121 goto unhandled;
4122 }
4123 unhandled:
4124 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
4125 VG_(tool_panic)("zwidenToHostWord");
4126 }
4127
4128
4129 /* Generate a shadow store. addr is always the original address atom.
4130 You can pass in either originals or V-bits for the data atom, but
4131 obviously not both. guard :: Ity_I1 controls whether the store
4132 really happens; NULL means it unconditionally does. Note that
4133 guard itself is not checked for definedness; the caller of this
4134 function must do that if necessary. */
4135
4136 static
do_shadow_Store(MCEnv * mce,IREndness end,IRAtom * addr,UInt bias,IRAtom * data,IRAtom * vdata,IRAtom * guard)4137 void do_shadow_Store ( MCEnv* mce,
4138 IREndness end,
4139 IRAtom* addr, UInt bias,
4140 IRAtom* data, IRAtom* vdata,
4141 IRAtom* guard )
4142 {
4143 IROp mkAdd;
4144 IRType ty, tyAddr;
4145 void* helper = NULL;
4146 Char* hname = NULL;
4147 IRConst* c;
4148
4149 tyAddr = mce->hWordTy;
4150 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4151 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
4152 tl_assert( end == Iend_LE || end == Iend_BE );
4153
4154 if (data) {
4155 tl_assert(!vdata);
4156 tl_assert(isOriginalAtom(mce, data));
4157 tl_assert(bias == 0);
4158 vdata = expr2vbits( mce, data );
4159 } else {
4160 tl_assert(vdata);
4161 }
4162
4163 tl_assert(isOriginalAtom(mce,addr));
4164 tl_assert(isShadowAtom(mce,vdata));
4165
4166 if (guard) {
4167 tl_assert(isOriginalAtom(mce, guard));
4168 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4169 }
4170
4171 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
4172
4173 // If we're not doing undefined value checking, pretend that this value
4174 // is "all valid". That lets Vex's optimiser remove some of the V bit
4175 // shadow computation ops that precede it.
4176 if (MC_(clo_mc_level) == 1) {
4177 switch (ty) {
4178 case Ity_V256: // V256 weirdness -- used four times
4179 c = IRConst_V256(V_BITS32_DEFINED); break;
4180 case Ity_V128: // V128 weirdness -- used twice
4181 c = IRConst_V128(V_BITS16_DEFINED); break;
4182 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
4183 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
4184 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
4185 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
4186 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4187 }
4188 vdata = IRExpr_Const( c );
4189 }
4190
4191 /* First, emit a definedness test for the address. This also sets
4192 the address (shadow) to 'defined' following the test. */
4193 complainIfUndefined( mce, addr, guard );
4194
4195 /* Now decide which helper function to call to write the data V
4196 bits into shadow memory. */
4197 if (end == Iend_LE) {
4198 switch (ty) {
4199 case Ity_V256: /* we'll use the helper four times */
4200 case Ity_V128: /* we'll use the helper twice */
4201 case Ity_I64: helper = &MC_(helperc_STOREV64le);
4202 hname = "MC_(helperc_STOREV64le)";
4203 break;
4204 case Ity_I32: helper = &MC_(helperc_STOREV32le);
4205 hname = "MC_(helperc_STOREV32le)";
4206 break;
4207 case Ity_I16: helper = &MC_(helperc_STOREV16le);
4208 hname = "MC_(helperc_STOREV16le)";
4209 break;
4210 case Ity_I8: helper = &MC_(helperc_STOREV8);
4211 hname = "MC_(helperc_STOREV8)";
4212 break;
4213 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4214 }
4215 } else {
4216 switch (ty) {
4217 case Ity_V128: /* we'll use the helper twice */
4218 case Ity_I64: helper = &MC_(helperc_STOREV64be);
4219 hname = "MC_(helperc_STOREV64be)";
4220 break;
4221 case Ity_I32: helper = &MC_(helperc_STOREV32be);
4222 hname = "MC_(helperc_STOREV32be)";
4223 break;
4224 case Ity_I16: helper = &MC_(helperc_STOREV16be);
4225 hname = "MC_(helperc_STOREV16be)";
4226 break;
4227 case Ity_I8: helper = &MC_(helperc_STOREV8);
4228 hname = "MC_(helperc_STOREV8)";
4229 break;
4230 /* Note, no V256 case here, because no big-endian target that
4231 we support, has 256 vectors. */
4232 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
4233 }
4234 }
4235
4236 if (UNLIKELY(ty == Ity_V256)) {
4237
4238 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
4239 Q3 being the most significant lane. */
4240 /* These are the offsets of the Qs in memory. */
4241 Int offQ0, offQ1, offQ2, offQ3;
4242
4243 /* Various bits for constructing the 4 lane helper calls */
4244 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
4245 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
4246 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
4247 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
4248
4249 if (end == Iend_LE) {
4250 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
4251 } else {
4252 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
4253 }
4254
4255 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
4256 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
4257 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
4258 diQ0 = unsafeIRDirty_0_N(
4259 1/*regparms*/,
4260 hname, VG_(fnptr_to_fnentry)( helper ),
4261 mkIRExprVec_2( addrQ0, vdataQ0 )
4262 );
4263
4264 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
4265 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
4266 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
4267 diQ1 = unsafeIRDirty_0_N(
4268 1/*regparms*/,
4269 hname, VG_(fnptr_to_fnentry)( helper ),
4270 mkIRExprVec_2( addrQ1, vdataQ1 )
4271 );
4272
4273 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
4274 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
4275 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
4276 diQ2 = unsafeIRDirty_0_N(
4277 1/*regparms*/,
4278 hname, VG_(fnptr_to_fnentry)( helper ),
4279 mkIRExprVec_2( addrQ2, vdataQ2 )
4280 );
4281
4282 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
4283 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
4284 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
4285 diQ3 = unsafeIRDirty_0_N(
4286 1/*regparms*/,
4287 hname, VG_(fnptr_to_fnentry)( helper ),
4288 mkIRExprVec_2( addrQ3, vdataQ3 )
4289 );
4290
4291 if (guard)
4292 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
4293
4294 setHelperAnns( mce, diQ0 );
4295 setHelperAnns( mce, diQ1 );
4296 setHelperAnns( mce, diQ2 );
4297 setHelperAnns( mce, diQ3 );
4298 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
4299 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
4300 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
4301 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
4302
4303 }
4304 else if (UNLIKELY(ty == Ity_V128)) {
4305
4306 /* V128-bit case */
4307 /* See comment in next clause re 64-bit regparms */
4308 /* also, need to be careful about endianness */
4309
4310 Int offLo64, offHi64;
4311 IRDirty *diLo64, *diHi64;
4312 IRAtom *addrLo64, *addrHi64;
4313 IRAtom *vdataLo64, *vdataHi64;
4314 IRAtom *eBiasLo64, *eBiasHi64;
4315
4316 if (end == Iend_LE) {
4317 offLo64 = 0;
4318 offHi64 = 8;
4319 } else {
4320 offLo64 = 8;
4321 offHi64 = 0;
4322 }
4323
4324 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
4325 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
4326 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
4327 diLo64 = unsafeIRDirty_0_N(
4328 1/*regparms*/,
4329 hname, VG_(fnptr_to_fnentry)( helper ),
4330 mkIRExprVec_2( addrLo64, vdataLo64 )
4331 );
4332 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
4333 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
4334 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
4335 diHi64 = unsafeIRDirty_0_N(
4336 1/*regparms*/,
4337 hname, VG_(fnptr_to_fnentry)( helper ),
4338 mkIRExprVec_2( addrHi64, vdataHi64 )
4339 );
4340 if (guard) diLo64->guard = guard;
4341 if (guard) diHi64->guard = guard;
4342 setHelperAnns( mce, diLo64 );
4343 setHelperAnns( mce, diHi64 );
4344 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
4345 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
4346
4347 } else {
4348
4349 IRDirty *di;
4350 IRAtom *addrAct;
4351
4352 /* 8/16/32/64-bit cases */
4353 /* Generate the actual address into addrAct. */
4354 if (bias == 0) {
4355 addrAct = addr;
4356 } else {
4357 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
4358 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
4359 }
4360
4361 if (ty == Ity_I64) {
4362 /* We can't do this with regparm 2 on 32-bit platforms, since
4363 the back ends aren't clever enough to handle 64-bit
4364 regparm args. Therefore be different. */
4365 di = unsafeIRDirty_0_N(
4366 1/*regparms*/,
4367 hname, VG_(fnptr_to_fnentry)( helper ),
4368 mkIRExprVec_2( addrAct, vdata )
4369 );
4370 } else {
4371 di = unsafeIRDirty_0_N(
4372 2/*regparms*/,
4373 hname, VG_(fnptr_to_fnentry)( helper ),
4374 mkIRExprVec_2( addrAct,
4375 zwidenToHostWord( mce, vdata ))
4376 );
4377 }
4378 if (guard) di->guard = guard;
4379 setHelperAnns( mce, di );
4380 stmt( 'V', mce, IRStmt_Dirty(di) );
4381 }
4382
4383 }
4384
4385
4386 /* Do lazy pessimistic propagation through a dirty helper call, by
4387 looking at the annotations on it. This is the most complex part of
4388 Memcheck. */
4389
szToITy(Int n)4390 static IRType szToITy ( Int n )
4391 {
4392 switch (n) {
4393 case 1: return Ity_I8;
4394 case 2: return Ity_I16;
4395 case 4: return Ity_I32;
4396 case 8: return Ity_I64;
4397 default: VG_(tool_panic)("szToITy(memcheck)");
4398 }
4399 }
4400
4401 static
do_shadow_Dirty(MCEnv * mce,IRDirty * d)4402 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4403 {
4404 Int i, k, n, toDo, gSz, gOff;
4405 IRAtom *src, *here, *curr;
4406 IRType tySrc, tyDst;
4407 IRTemp dst;
4408 IREndness end;
4409
4410 /* What's the native endianness? We need to know this. */
4411 # if defined(VG_BIGENDIAN)
4412 end = Iend_BE;
4413 # elif defined(VG_LITTLEENDIAN)
4414 end = Iend_LE;
4415 # else
4416 # error "Unknown endianness"
4417 # endif
4418
4419 /* First check the guard. */
4420 complainIfUndefined(mce, d->guard, NULL);
4421
4422 /* Now round up all inputs and PCast over them. */
4423 curr = definedOfType(Ity_I32);
4424
4425 /* Inputs: unmasked args
4426 Note: arguments are evaluated REGARDLESS of the guard expression */
4427 for (i = 0; d->args[i]; i++) {
4428 if (d->cee->mcx_mask & (1<<i)) {
4429 /* ignore this arg */
4430 } else {
4431 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
4432 curr = mkUifU32(mce, here, curr);
4433 }
4434 }
4435
4436 /* Inputs: guest state that we read. */
4437 for (i = 0; i < d->nFxState; i++) {
4438 tl_assert(d->fxState[i].fx != Ifx_None);
4439 if (d->fxState[i].fx == Ifx_Write)
4440 continue;
4441
4442 /* Enumerate the described state segments */
4443 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4444 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4445 gSz = d->fxState[i].size;
4446
4447 /* Ignore any sections marked as 'always defined'. */
4448 if (isAlwaysDefd(mce, gOff, gSz)) {
4449 if (0)
4450 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4451 gOff, gSz);
4452 continue;
4453 }
4454
4455 /* This state element is read or modified. So we need to
4456 consider it. If larger than 8 bytes, deal with it in
4457 8-byte chunks. */
4458 while (True) {
4459 tl_assert(gSz >= 0);
4460 if (gSz == 0) break;
4461 n = gSz <= 8 ? gSz : 8;
4462 /* update 'curr' with UifU of the state slice
4463 gOff .. gOff+n-1 */
4464 tySrc = szToITy( n );
4465
4466 /* Observe the guard expression. If it is false use an
4467 all-bits-defined bit pattern */
4468 IRAtom *cond, *iffalse, *iftrue;
4469
4470 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
4471 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
4472 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
4473 src = assignNew('V', mce, tySrc,
4474 IRExpr_Mux0X(cond, iffalse, iftrue));
4475
4476 here = mkPCastTo( mce, Ity_I32, src );
4477 curr = mkUifU32(mce, here, curr);
4478 gSz -= n;
4479 gOff += n;
4480 }
4481 }
4482 }
4483
4484 /* Inputs: memory. First set up some info needed regardless of
4485 whether we're doing reads or writes. */
4486
4487 if (d->mFx != Ifx_None) {
4488 /* Because we may do multiple shadow loads/stores from the same
4489 base address, it's best to do a single test of its
4490 definedness right now. Post-instrumentation optimisation
4491 should remove all but this test. */
4492 IRType tyAddr;
4493 tl_assert(d->mAddr);
4494 complainIfUndefined(mce, d->mAddr, d->guard);
4495
4496 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
4497 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4498 tl_assert(tyAddr == mce->hWordTy); /* not really right */
4499 }
4500
4501 /* Deal with memory inputs (reads or modifies) */
4502 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
4503 toDo = d->mSize;
4504 /* chew off 32-bit chunks. We don't care about the endianness
4505 since it's all going to be condensed down to a single bit,
4506 but nevertheless choose an endianness which is hopefully
4507 native to the platform. */
4508 while (toDo >= 4) {
4509 here = mkPCastTo(
4510 mce, Ity_I32,
4511 expr2vbits_guarded_Load ( mce, end, Ity_I32, d->mAddr,
4512 d->mSize - toDo, d->guard )
4513 );
4514 curr = mkUifU32(mce, here, curr);
4515 toDo -= 4;
4516 }
4517 /* chew off 16-bit chunks */
4518 while (toDo >= 2) {
4519 here = mkPCastTo(
4520 mce, Ity_I32,
4521 expr2vbits_guarded_Load ( mce, end, Ity_I16, d->mAddr,
4522 d->mSize - toDo, d->guard )
4523 );
4524 curr = mkUifU32(mce, here, curr);
4525 toDo -= 2;
4526 }
4527 /* chew off the remaining 8-bit chunk, if any */
4528 if (toDo == 1) {
4529 here = mkPCastTo(
4530 mce, Ity_I32,
4531 expr2vbits_guarded_Load ( mce, end, Ity_I8, d->mAddr,
4532 d->mSize - toDo, d->guard )
4533 );
4534 curr = mkUifU32(mce, here, curr);
4535 toDo -= 1;
4536 }
4537 tl_assert(toDo == 0);
4538 }
4539
4540 /* Whew! So curr is a 32-bit V-value summarising pessimistically
4541 all the inputs to the helper. Now we need to re-distribute the
4542 results to all destinations. */
4543
4544 /* Outputs: the destination temporary, if there is one. */
4545 if (d->tmp != IRTemp_INVALID) {
4546 dst = findShadowTmpV(mce, d->tmp);
4547 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
4548 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
4549 }
4550
4551 /* Outputs: guest state that we write or modify. */
4552 for (i = 0; i < d->nFxState; i++) {
4553 tl_assert(d->fxState[i].fx != Ifx_None);
4554 if (d->fxState[i].fx == Ifx_Read)
4555 continue;
4556
4557 /* Enumerate the described state segments */
4558 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4559 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4560 gSz = d->fxState[i].size;
4561
4562 /* Ignore any sections marked as 'always defined'. */
4563 if (isAlwaysDefd(mce, gOff, gSz))
4564 continue;
4565
4566 /* This state element is written or modified. So we need to
4567 consider it. If larger than 8 bytes, deal with it in
4568 8-byte chunks. */
4569 while (True) {
4570 tl_assert(gSz >= 0);
4571 if (gSz == 0) break;
4572 n = gSz <= 8 ? gSz : 8;
4573 /* Write suitably-casted 'curr' to the state slice
4574 gOff .. gOff+n-1 */
4575 tyDst = szToITy( n );
4576 do_shadow_PUT( mce, gOff,
4577 NULL, /* original atom */
4578 mkPCastTo( mce, tyDst, curr ), d->guard );
4579 gSz -= n;
4580 gOff += n;
4581 }
4582 }
4583 }
4584
4585 /* Outputs: memory that we write or modify. Same comments about
4586 endianness as above apply. */
4587 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
4588 toDo = d->mSize;
4589 /* chew off 32-bit chunks */
4590 while (toDo >= 4) {
4591 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4592 NULL, /* original data */
4593 mkPCastTo( mce, Ity_I32, curr ),
4594 d->guard );
4595 toDo -= 4;
4596 }
4597 /* chew off 16-bit chunks */
4598 while (toDo >= 2) {
4599 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4600 NULL, /* original data */
4601 mkPCastTo( mce, Ity_I16, curr ),
4602 d->guard );
4603 toDo -= 2;
4604 }
4605 /* chew off the remaining 8-bit chunk, if any */
4606 if (toDo == 1) {
4607 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4608 NULL, /* original data */
4609 mkPCastTo( mce, Ity_I8, curr ),
4610 d->guard );
4611 toDo -= 1;
4612 }
4613 tl_assert(toDo == 0);
4614 }
4615
4616 }
4617
4618
4619 /* We have an ABI hint telling us that [base .. base+len-1] is to
4620 become undefined ("writable"). Generate code to call a helper to
4621 notify the A/V bit machinery of this fact.
4622
4623 We call
4624 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
4625 Addr nia );
4626 */
4627 static
do_AbiHint(MCEnv * mce,IRExpr * base,Int len,IRExpr * nia)4628 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
4629 {
4630 IRDirty* di;
4631 /* Minor optimisation: if not doing origin tracking, ignore the
4632 supplied nia and pass zero instead. This is on the basis that
4633 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
4634 almost always generate a shorter instruction to put zero into a
4635 register than any other value. */
4636 if (MC_(clo_mc_level) < 3)
4637 nia = mkIRExpr_HWord(0);
4638
4639 di = unsafeIRDirty_0_N(
4640 0/*regparms*/,
4641 "MC_(helperc_MAKE_STACK_UNINIT)",
4642 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
4643 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
4644 );
4645 stmt( 'V', mce, IRStmt_Dirty(di) );
4646 }
4647
4648
4649 /* ------ Dealing with IRCAS (big and complex) ------ */
4650
4651 /* FWDS */
4652 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
4653 IRAtom* baseaddr, Int offset );
4654 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4655 static void gen_store_b ( MCEnv* mce, Int szB,
4656 IRAtom* baseaddr, Int offset, IRAtom* dataB,
4657 IRAtom* guard );
4658
4659 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4660 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4661
4662
4663 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4664 IRExpr.Consts, else this asserts. If they are both Consts, it
4665 doesn't do anything. So that just leaves the RdTmp case.
4666
4667 In which case: this assigns the shadow value SHADOW to the IR
4668 shadow temporary associated with ORIG. That is, ORIG, being an
4669 original temporary, will have a shadow temporary associated with
4670 it. However, in the case envisaged here, there will so far have
4671 been no IR emitted to actually write a shadow value into that
4672 temporary. What this routine does is to (emit IR to) copy the
4673 value in SHADOW into said temporary, so that after this call,
4674 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4675 value in SHADOW.
4676
4677 Point is to allow callers to compute "by hand" a shadow value for
4678 ORIG, and force it to be associated with ORIG.
4679
4680 How do we know that that shadow associated with ORIG has not so far
4681 been assigned to? Well, we don't per se know that, but supposing
4682 it had. Then this routine would create a second assignment to it,
4683 and later the IR sanity checker would barf. But that never
4684 happens. QED.
4685 */
bind_shadow_tmp_to_orig(UChar how,MCEnv * mce,IRAtom * orig,IRAtom * shadow)4686 static void bind_shadow_tmp_to_orig ( UChar how,
4687 MCEnv* mce,
4688 IRAtom* orig, IRAtom* shadow )
4689 {
4690 tl_assert(isOriginalAtom(mce, orig));
4691 tl_assert(isShadowAtom(mce, shadow));
4692 switch (orig->tag) {
4693 case Iex_Const:
4694 tl_assert(shadow->tag == Iex_Const);
4695 break;
4696 case Iex_RdTmp:
4697 tl_assert(shadow->tag == Iex_RdTmp);
4698 if (how == 'V') {
4699 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4700 shadow);
4701 } else {
4702 tl_assert(how == 'B');
4703 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4704 shadow);
4705 }
4706 break;
4707 default:
4708 tl_assert(0);
4709 }
4710 }
4711
4712
4713 static
do_shadow_CAS(MCEnv * mce,IRCAS * cas)4714 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4715 {
4716 /* Scheme is (both single- and double- cases):
4717
4718 1. fetch data#,dataB (the proposed new value)
4719
4720 2. fetch expd#,expdB (what we expect to see at the address)
4721
4722 3. check definedness of address
4723
4724 4. load old#,oldB from shadow memory; this also checks
4725 addressibility of the address
4726
4727 5. the CAS itself
4728
4729 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
4730
4731 7. if "expected == old" (as computed by (6))
4732 store data#,dataB to shadow memory
4733
4734 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
4735 'data' but 7 stores 'data#'. Hence it is possible for the
4736 shadow data to be incorrectly checked and/or updated:
4737
4738 * 7 is at least gated correctly, since the 'expected == old'
4739 condition is derived from outputs of 5. However, the shadow
4740 write could happen too late: imagine after 5 we are
4741 descheduled, a different thread runs, writes a different
4742 (shadow) value at the address, and then we resume, hence
4743 overwriting the shadow value written by the other thread.
4744
4745 Because the original memory access is atomic, there's no way to
4746 make both the original and shadow accesses into a single atomic
4747 thing, hence this is unavoidable.
4748
4749 At least as Valgrind stands, I don't think it's a problem, since
4750 we're single threaded *and* we guarantee that there are no
4751 context switches during the execution of any specific superblock
4752 -- context switches can only happen at superblock boundaries.
4753
4754 If Valgrind ever becomes MT in the future, then it might be more
4755 of a problem. A possible kludge would be to artificially
4756 associate with the location, a lock, which we must acquire and
4757 release around the transaction as a whole. Hmm, that probably
4758 would't work properly since it only guards us against other
4759 threads doing CASs on the same location, not against other
4760 threads doing normal reads and writes.
4761
4762 ------------------------------------------------------------
4763
4764 COMMENT_ON_CasCmpEQ:
4765
4766 Note two things. Firstly, in the sequence above, we compute
4767 "expected == old", but we don't check definedness of it. Why
4768 not? Also, the x86 and amd64 front ends use
4769 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4770 determination (expected == old ?) for themselves, and we also
4771 don't check definedness for those primops; we just say that the
4772 result is defined. Why? Details follow.
4773
4774 x86/amd64 contains various forms of locked insns:
4775 * lock prefix before all basic arithmetic insn;
4776 eg lock xorl %reg1,(%reg2)
4777 * atomic exchange reg-mem
4778 * compare-and-swaps
4779
4780 Rather than attempt to represent them all, which would be a
4781 royal PITA, I used a result from Maurice Herlihy
4782 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4783 demonstrates that compare-and-swap is a primitive more general
4784 than the other two, and so can be used to represent all of them.
4785 So the translation scheme for (eg) lock incl (%reg) is as
4786 follows:
4787
4788 again:
4789 old = * %reg
4790 new = old + 1
4791 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4792
4793 The "atomically" is the CAS bit. The scheme is always the same:
4794 get old value from memory, compute new value, atomically stuff
4795 new value back in memory iff the old value has not changed (iow,
4796 no other thread modified it in the meantime). If it has changed
4797 then we've been out-raced and we have to start over.
4798
4799 Now that's all very neat, but it has the bad side effect of
4800 introducing an explicit equality test into the translation.
4801 Consider the behaviour of said code on a memory location which
4802 is uninitialised. We will wind up doing a comparison on
4803 uninitialised data, and mc duly complains.
4804
4805 What's difficult about this is, the common case is that the
4806 location is uncontended, and so we're usually comparing the same
4807 value (* %reg) with itself. So we shouldn't complain even if it
4808 is undefined. But mc doesn't know that.
4809
4810 My solution is to mark the == in the IR specially, so as to tell
4811 mc that it almost certainly compares a value with itself, and we
4812 should just regard the result as always defined. Rather than
4813 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4814 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4815
4816 So there's always the question of, can this give a false
4817 negative? eg, imagine that initially, * %reg is defined; and we
4818 read that; but then in the gap between the read and the CAS, a
4819 different thread writes an undefined (and different) value at
4820 the location. Then the CAS in this thread will fail and we will
4821 go back to "again:", but without knowing that the trip back
4822 there was based on an undefined comparison. No matter; at least
4823 the other thread won the race and the location is correctly
4824 marked as undefined. What if it wrote an uninitialised version
4825 of the same value that was there originally, though?
4826
4827 etc etc. Seems like there's a small corner case in which we
4828 might lose the fact that something's defined -- we're out-raced
4829 in between the "old = * reg" and the "atomically {", _and_ the
4830 other thread is writing in an undefined version of what's
4831 already there. Well, that seems pretty unlikely.
4832
4833 ---
4834
4835 If we ever need to reinstate it .. code which generates a
4836 definedness test for "expected == old" was removed at r10432 of
4837 this file.
4838 */
4839 if (cas->oldHi == IRTemp_INVALID) {
4840 do_shadow_CAS_single( mce, cas );
4841 } else {
4842 do_shadow_CAS_double( mce, cas );
4843 }
4844 }
4845
4846
do_shadow_CAS_single(MCEnv * mce,IRCAS * cas)4847 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4848 {
4849 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4850 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4851 IRAtom *voldLo = NULL, *boldLo = NULL;
4852 IRAtom *expd_eq_old = NULL;
4853 IROp opCasCmpEQ;
4854 Int elemSzB;
4855 IRType elemTy;
4856 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4857
4858 /* single CAS */
4859 tl_assert(cas->oldHi == IRTemp_INVALID);
4860 tl_assert(cas->expdHi == NULL);
4861 tl_assert(cas->dataHi == NULL);
4862
4863 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4864 switch (elemTy) {
4865 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
4866 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4867 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4868 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
4869 default: tl_assert(0); /* IR defn disallows any other types */
4870 }
4871
4872 /* 1. fetch data# (the proposed new value) */
4873 tl_assert(isOriginalAtom(mce, cas->dataLo));
4874 vdataLo
4875 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4876 tl_assert(isShadowAtom(mce, vdataLo));
4877 if (otrak) {
4878 bdataLo
4879 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4880 tl_assert(isShadowAtom(mce, bdataLo));
4881 }
4882
4883 /* 2. fetch expected# (what we expect to see at the address) */
4884 tl_assert(isOriginalAtom(mce, cas->expdLo));
4885 vexpdLo
4886 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4887 tl_assert(isShadowAtom(mce, vexpdLo));
4888 if (otrak) {
4889 bexpdLo
4890 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4891 tl_assert(isShadowAtom(mce, bexpdLo));
4892 }
4893
4894 /* 3. check definedness of address */
4895 /* 4. fetch old# from shadow memory; this also checks
4896 addressibility of the address */
4897 voldLo
4898 = assignNew(
4899 'V', mce, elemTy,
4900 expr2vbits_Load(
4901 mce,
4902 cas->end, elemTy, cas->addr, 0/*Addr bias*/
4903 ));
4904 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
4905 if (otrak) {
4906 boldLo
4907 = assignNew('B', mce, Ity_I32,
4908 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
4909 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
4910 }
4911
4912 /* 5. the CAS itself */
4913 stmt( 'C', mce, IRStmt_CAS(cas) );
4914
4915 /* 6. compute "expected == old" */
4916 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
4917 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4918 tree, but it's not copied from the input block. */
4919 expd_eq_old
4920 = assignNew('C', mce, Ity_I1,
4921 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
4922
4923 /* 7. if "expected == old"
4924 store data# to shadow memory */
4925 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
4926 NULL/*data*/, vdataLo/*vdata*/,
4927 expd_eq_old/*guard for store*/ );
4928 if (otrak) {
4929 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
4930 bdataLo/*bdata*/,
4931 expd_eq_old/*guard for store*/ );
4932 }
4933 }
4934
4935
do_shadow_CAS_double(MCEnv * mce,IRCAS * cas)4936 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
4937 {
4938 IRAtom *vdataHi = NULL, *bdataHi = NULL;
4939 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4940 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
4941 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4942 IRAtom *voldHi = NULL, *boldHi = NULL;
4943 IRAtom *voldLo = NULL, *boldLo = NULL;
4944 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
4945 IRAtom *expd_eq_old = NULL, *zero = NULL;
4946 IROp opCasCmpEQ, opOr, opXor;
4947 Int elemSzB, memOffsLo, memOffsHi;
4948 IRType elemTy;
4949 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4950
4951 /* double CAS */
4952 tl_assert(cas->oldHi != IRTemp_INVALID);
4953 tl_assert(cas->expdHi != NULL);
4954 tl_assert(cas->dataHi != NULL);
4955
4956 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4957 switch (elemTy) {
4958 case Ity_I8:
4959 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
4960 elemSzB = 1; zero = mkU8(0);
4961 break;
4962 case Ity_I16:
4963 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
4964 elemSzB = 2; zero = mkU16(0);
4965 break;
4966 case Ity_I32:
4967 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
4968 elemSzB = 4; zero = mkU32(0);
4969 break;
4970 case Ity_I64:
4971 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
4972 elemSzB = 8; zero = mkU64(0);
4973 break;
4974 default:
4975 tl_assert(0); /* IR defn disallows any other types */
4976 }
4977
4978 /* 1. fetch data# (the proposed new value) */
4979 tl_assert(isOriginalAtom(mce, cas->dataHi));
4980 tl_assert(isOriginalAtom(mce, cas->dataLo));
4981 vdataHi
4982 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
4983 vdataLo
4984 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4985 tl_assert(isShadowAtom(mce, vdataHi));
4986 tl_assert(isShadowAtom(mce, vdataLo));
4987 if (otrak) {
4988 bdataHi
4989 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
4990 bdataLo
4991 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4992 tl_assert(isShadowAtom(mce, bdataHi));
4993 tl_assert(isShadowAtom(mce, bdataLo));
4994 }
4995
4996 /* 2. fetch expected# (what we expect to see at the address) */
4997 tl_assert(isOriginalAtom(mce, cas->expdHi));
4998 tl_assert(isOriginalAtom(mce, cas->expdLo));
4999 vexpdHi
5000 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5001 vexpdLo
5002 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5003 tl_assert(isShadowAtom(mce, vexpdHi));
5004 tl_assert(isShadowAtom(mce, vexpdLo));
5005 if (otrak) {
5006 bexpdHi
5007 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5008 bexpdLo
5009 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5010 tl_assert(isShadowAtom(mce, bexpdHi));
5011 tl_assert(isShadowAtom(mce, bexpdLo));
5012 }
5013
5014 /* 3. check definedness of address */
5015 /* 4. fetch old# from shadow memory; this also checks
5016 addressibility of the address */
5017 if (cas->end == Iend_LE) {
5018 memOffsLo = 0;
5019 memOffsHi = elemSzB;
5020 } else {
5021 tl_assert(cas->end == Iend_BE);
5022 memOffsLo = elemSzB;
5023 memOffsHi = 0;
5024 }
5025 voldHi
5026 = assignNew(
5027 'V', mce, elemTy,
5028 expr2vbits_Load(
5029 mce,
5030 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
5031 ));
5032 voldLo
5033 = assignNew(
5034 'V', mce, elemTy,
5035 expr2vbits_Load(
5036 mce,
5037 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
5038 ));
5039 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5040 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
5041 if (otrak) {
5042 boldHi
5043 = assignNew('B', mce, Ity_I32,
5044 gen_load_b(mce, elemSzB, cas->addr,
5045 memOffsHi/*addr bias*/));
5046 boldLo
5047 = assignNew('B', mce, Ity_I32,
5048 gen_load_b(mce, elemSzB, cas->addr,
5049 memOffsLo/*addr bias*/));
5050 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5051 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
5052 }
5053
5054 /* 5. the CAS itself */
5055 stmt( 'C', mce, IRStmt_CAS(cas) );
5056
5057 /* 6. compute "expected == old" */
5058 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
5059 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5060 tree, but it's not copied from the input block. */
5061 /*
5062 xHi = oldHi ^ expdHi;
5063 xLo = oldLo ^ expdLo;
5064 xHL = xHi | xLo;
5065 expd_eq_old = xHL == 0;
5066 */
5067 xHi = assignNew('C', mce, elemTy,
5068 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
5069 xLo = assignNew('C', mce, elemTy,
5070 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
5071 xHL = assignNew('C', mce, elemTy,
5072 binop(opOr, xHi, xLo));
5073 expd_eq_old
5074 = assignNew('C', mce, Ity_I1,
5075 binop(opCasCmpEQ, xHL, zero));
5076
5077 /* 7. if "expected == old"
5078 store data# to shadow memory */
5079 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5080 NULL/*data*/, vdataHi/*vdata*/,
5081 expd_eq_old/*guard for store*/ );
5082 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5083 NULL/*data*/, vdataLo/*vdata*/,
5084 expd_eq_old/*guard for store*/ );
5085 if (otrak) {
5086 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5087 bdataHi/*bdata*/,
5088 expd_eq_old/*guard for store*/ );
5089 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5090 bdataLo/*bdata*/,
5091 expd_eq_old/*guard for store*/ );
5092 }
5093 }
5094
5095
5096 /* ------ Dealing with LL/SC (not difficult) ------ */
5097
do_shadow_LLSC(MCEnv * mce,IREndness stEnd,IRTemp stResult,IRExpr * stAddr,IRExpr * stStoredata)5098 static void do_shadow_LLSC ( MCEnv* mce,
5099 IREndness stEnd,
5100 IRTemp stResult,
5101 IRExpr* stAddr,
5102 IRExpr* stStoredata )
5103 {
5104 /* In short: treat a load-linked like a normal load followed by an
5105 assignment of the loaded (shadow) data to the result temporary.
5106 Treat a store-conditional like a normal store, and mark the
5107 result temporary as defined. */
5108 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
5109 IRTemp resTmp = findShadowTmpV(mce, stResult);
5110
5111 tl_assert(isIRAtom(stAddr));
5112 if (stStoredata)
5113 tl_assert(isIRAtom(stStoredata));
5114
5115 if (stStoredata == NULL) {
5116 /* Load Linked */
5117 /* Just treat this as a normal load, followed by an assignment of
5118 the value to .result. */
5119 /* Stay sane */
5120 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5121 || resTy == Ity_I16 || resTy == Ity_I8);
5122 assign( 'V', mce, resTmp,
5123 expr2vbits_Load(
5124 mce, stEnd, resTy, stAddr, 0/*addr bias*/));
5125 } else {
5126 /* Store Conditional */
5127 /* Stay sane */
5128 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
5129 stStoredata);
5130 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
5131 || dataTy == Ity_I16 || dataTy == Ity_I8);
5132 do_shadow_Store( mce, stEnd,
5133 stAddr, 0/* addr bias */,
5134 stStoredata,
5135 NULL /* shadow data */,
5136 NULL/*guard*/ );
5137 /* This is a store conditional, so it writes to .result a value
5138 indicating whether or not the store succeeded. Just claim
5139 this value is always defined. In the PowerPC interpretation
5140 of store-conditional, definedness of the success indication
5141 depends on whether the address of the store matches the
5142 reservation address. But we can't tell that here (and
5143 anyway, we're not being PowerPC-specific). At least we are
5144 guaranteed that the definedness of the store address, and its
5145 addressibility, will be checked as per normal. So it seems
5146 pretty safe to just say that the success indication is always
5147 defined.
5148
5149 In schemeS, for origin tracking, we must correspondingly set
5150 a no-origin value for the origin shadow of .result.
5151 */
5152 tl_assert(resTy == Ity_I1);
5153 assign( 'V', mce, resTmp, definedOfType(resTy) );
5154 }
5155 }
5156
5157
5158 /*------------------------------------------------------------*/
5159 /*--- Memcheck main ---*/
5160 /*------------------------------------------------------------*/
5161
5162 static void schemeS ( MCEnv* mce, IRStmt* st );
5163
isBogusAtom(IRAtom * at)5164 static Bool isBogusAtom ( IRAtom* at )
5165 {
5166 ULong n = 0;
5167 IRConst* con;
5168 tl_assert(isIRAtom(at));
5169 if (at->tag == Iex_RdTmp)
5170 return False;
5171 tl_assert(at->tag == Iex_Const);
5172 con = at->Iex.Const.con;
5173 switch (con->tag) {
5174 case Ico_U1: return False;
5175 case Ico_U8: n = (ULong)con->Ico.U8; break;
5176 case Ico_U16: n = (ULong)con->Ico.U16; break;
5177 case Ico_U32: n = (ULong)con->Ico.U32; break;
5178 case Ico_U64: n = (ULong)con->Ico.U64; break;
5179 case Ico_F64: return False;
5180 case Ico_F32i: return False;
5181 case Ico_F64i: return False;
5182 case Ico_V128: return False;
5183 default: ppIRExpr(at); tl_assert(0);
5184 }
5185 /* VG_(printf)("%llx\n", n); */
5186 return (/*32*/ n == 0xFEFEFEFFULL
5187 /*32*/ || n == 0x80808080ULL
5188 /*32*/ || n == 0x7F7F7F7FULL
5189 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
5190 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
5191 /*64*/ || n == 0x0000000000008080ULL
5192 /*64*/ || n == 0x8080808080808080ULL
5193 /*64*/ || n == 0x0101010101010101ULL
5194 );
5195 }
5196
checkForBogusLiterals(IRStmt * st)5197 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
5198 {
5199 Int i;
5200 IRExpr* e;
5201 IRDirty* d;
5202 IRCAS* cas;
5203 switch (st->tag) {
5204 case Ist_WrTmp:
5205 e = st->Ist.WrTmp.data;
5206 switch (e->tag) {
5207 case Iex_Get:
5208 case Iex_RdTmp:
5209 return False;
5210 case Iex_Const:
5211 return isBogusAtom(e);
5212 case Iex_Unop:
5213 return isBogusAtom(e->Iex.Unop.arg);
5214 case Iex_GetI:
5215 return isBogusAtom(e->Iex.GetI.ix);
5216 case Iex_Binop:
5217 return isBogusAtom(e->Iex.Binop.arg1)
5218 || isBogusAtom(e->Iex.Binop.arg2);
5219 case Iex_Triop:
5220 return isBogusAtom(e->Iex.Triop.details->arg1)
5221 || isBogusAtom(e->Iex.Triop.details->arg2)
5222 || isBogusAtom(e->Iex.Triop.details->arg3);
5223 case Iex_Qop:
5224 return isBogusAtom(e->Iex.Qop.details->arg1)
5225 || isBogusAtom(e->Iex.Qop.details->arg2)
5226 || isBogusAtom(e->Iex.Qop.details->arg3)
5227 || isBogusAtom(e->Iex.Qop.details->arg4);
5228 case Iex_Mux0X:
5229 return isBogusAtom(e->Iex.Mux0X.cond)
5230 || isBogusAtom(e->Iex.Mux0X.expr0)
5231 || isBogusAtom(e->Iex.Mux0X.exprX);
5232 case Iex_Load:
5233 return isBogusAtom(e->Iex.Load.addr);
5234 case Iex_CCall:
5235 for (i = 0; e->Iex.CCall.args[i]; i++)
5236 if (isBogusAtom(e->Iex.CCall.args[i]))
5237 return True;
5238 return False;
5239 default:
5240 goto unhandled;
5241 }
5242 case Ist_Dirty:
5243 d = st->Ist.Dirty.details;
5244 for (i = 0; d->args[i]; i++)
5245 if (isBogusAtom(d->args[i]))
5246 return True;
5247 if (d->guard && isBogusAtom(d->guard))
5248 return True;
5249 if (d->mAddr && isBogusAtom(d->mAddr))
5250 return True;
5251 return False;
5252 case Ist_Put:
5253 return isBogusAtom(st->Ist.Put.data);
5254 case Ist_PutI:
5255 return isBogusAtom(st->Ist.PutI.details->ix)
5256 || isBogusAtom(st->Ist.PutI.details->data);
5257 case Ist_Store:
5258 return isBogusAtom(st->Ist.Store.addr)
5259 || isBogusAtom(st->Ist.Store.data);
5260 case Ist_Exit:
5261 return isBogusAtom(st->Ist.Exit.guard);
5262 case Ist_AbiHint:
5263 return isBogusAtom(st->Ist.AbiHint.base)
5264 || isBogusAtom(st->Ist.AbiHint.nia);
5265 case Ist_NoOp:
5266 case Ist_IMark:
5267 case Ist_MBE:
5268 return False;
5269 case Ist_CAS:
5270 cas = st->Ist.CAS.details;
5271 return isBogusAtom(cas->addr)
5272 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
5273 || isBogusAtom(cas->expdLo)
5274 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
5275 || isBogusAtom(cas->dataLo);
5276 case Ist_LLSC:
5277 return isBogusAtom(st->Ist.LLSC.addr)
5278 || (st->Ist.LLSC.storedata
5279 ? isBogusAtom(st->Ist.LLSC.storedata)
5280 : False);
5281 default:
5282 unhandled:
5283 ppIRStmt(st);
5284 VG_(tool_panic)("hasBogusLiterals");
5285 }
5286 }
5287
5288
MC_(instrument)5289 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
5290 IRSB* sb_in,
5291 VexGuestLayout* layout,
5292 VexGuestExtents* vge,
5293 IRType gWordTy, IRType hWordTy )
5294 {
5295 Bool verboze = 0||False;
5296 Bool bogus;
5297 Int i, j, first_stmt;
5298 IRStmt* st;
5299 MCEnv mce;
5300 IRSB* sb_out;
5301
5302 if (gWordTy != hWordTy) {
5303 /* We don't currently support this case. */
5304 VG_(tool_panic)("host/guest word size mismatch");
5305 }
5306
5307 /* Check we're not completely nuts */
5308 tl_assert(sizeof(UWord) == sizeof(void*));
5309 tl_assert(sizeof(Word) == sizeof(void*));
5310 tl_assert(sizeof(Addr) == sizeof(void*));
5311 tl_assert(sizeof(ULong) == 8);
5312 tl_assert(sizeof(Long) == 8);
5313 tl_assert(sizeof(Addr64) == 8);
5314 tl_assert(sizeof(UInt) == 4);
5315 tl_assert(sizeof(Int) == 4);
5316
5317 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
5318
5319 /* Set up SB */
5320 sb_out = deepCopyIRSBExceptStmts(sb_in);
5321
5322 /* Set up the running environment. Both .sb and .tmpMap are
5323 modified as we go along. Note that tmps are added to both
5324 .sb->tyenv and .tmpMap together, so the valid index-set for
5325 those two arrays should always be identical. */
5326 VG_(memset)(&mce, 0, sizeof(mce));
5327 mce.sb = sb_out;
5328 mce.trace = verboze;
5329 mce.layout = layout;
5330 mce.hWordTy = hWordTy;
5331 mce.bogusLiterals = False;
5332
5333 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
5334 Darwin. 10.7 is mostly built with LLVM, which uses these for
5335 bitfield inserts, and we get a lot of false errors if the cheap
5336 interpretation is used, alas. Could solve this much better if
5337 we knew which of such adds came from x86/amd64 LEA instructions,
5338 since these are the only ones really needing the expensive
5339 interpretation, but that would require some way to tag them in
5340 the _toIR.c front ends, which is a lot of faffing around. So
5341 for now just use the slow and blunt-instrument solution. */
5342 mce.useLLVMworkarounds = False;
5343 # if defined(VGO_darwin)
5344 mce.useLLVMworkarounds = True;
5345 # endif
5346
5347 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
5348 sizeof(TempMapEnt));
5349 for (i = 0; i < sb_in->tyenv->types_used; i++) {
5350 TempMapEnt ent;
5351 ent.kind = Orig;
5352 ent.shadowV = IRTemp_INVALID;
5353 ent.shadowB = IRTemp_INVALID;
5354 VG_(addToXA)( mce.tmpMap, &ent );
5355 }
5356 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
5357
5358 /* Make a preliminary inspection of the statements, to see if there
5359 are any dodgy-looking literals. If there are, we generate
5360 extra-detailed (hence extra-expensive) instrumentation in
5361 places. Scan the whole bb even if dodgyness is found earlier,
5362 so that the flatness assertion is applied to all stmts. */
5363
5364 bogus = False;
5365
5366 for (i = 0; i < sb_in->stmts_used; i++) {
5367
5368 st = sb_in->stmts[i];
5369 tl_assert(st);
5370 tl_assert(isFlatIRStmt(st));
5371
5372 if (!bogus) {
5373 bogus = checkForBogusLiterals(st);
5374 if (0 && bogus) {
5375 VG_(printf)("bogus: ");
5376 ppIRStmt(st);
5377 VG_(printf)("\n");
5378 }
5379 }
5380
5381 }
5382
5383 mce.bogusLiterals = bogus;
5384
5385 /* Copy verbatim any IR preamble preceding the first IMark */
5386
5387 tl_assert(mce.sb == sb_out);
5388 tl_assert(mce.sb != sb_in);
5389
5390 i = 0;
5391 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
5392
5393 st = sb_in->stmts[i];
5394 tl_assert(st);
5395 tl_assert(isFlatIRStmt(st));
5396
5397 stmt( 'C', &mce, sb_in->stmts[i] );
5398 i++;
5399 }
5400
5401 /* Nasty problem. IR optimisation of the pre-instrumented IR may
5402 cause the IR following the preamble to contain references to IR
5403 temporaries defined in the preamble. Because the preamble isn't
5404 instrumented, these temporaries don't have any shadows.
5405 Nevertheless uses of them following the preamble will cause
5406 memcheck to generate references to their shadows. End effect is
5407 to cause IR sanity check failures, due to references to
5408 non-existent shadows. This is only evident for the complex
5409 preambles used for function wrapping on TOC-afflicted platforms
5410 (ppc64-linux).
5411
5412 The following loop therefore scans the preamble looking for
5413 assignments to temporaries. For each one found it creates an
5414 assignment to the corresponding (V) shadow temp, marking it as
5415 'defined'. This is the same resulting IR as if the main
5416 instrumentation loop before had been applied to the statement
5417 'tmp = CONSTANT'.
5418
5419 Similarly, if origin tracking is enabled, we must generate an
5420 assignment for the corresponding origin (B) shadow, claiming
5421 no-origin, as appropriate for a defined value.
5422 */
5423 for (j = 0; j < i; j++) {
5424 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
5425 /* findShadowTmpV checks its arg is an original tmp;
5426 no need to assert that here. */
5427 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
5428 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
5429 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
5430 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
5431 if (MC_(clo_mc_level) == 3) {
5432 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
5433 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
5434 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
5435 }
5436 if (0) {
5437 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
5438 ppIRType( ty_v );
5439 VG_(printf)("\n");
5440 }
5441 }
5442 }
5443
5444 /* Iterate over the remaining stmts to generate instrumentation. */
5445
5446 tl_assert(sb_in->stmts_used > 0);
5447 tl_assert(i >= 0);
5448 tl_assert(i < sb_in->stmts_used);
5449 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
5450
5451 for (/* use current i*/; i < sb_in->stmts_used; i++) {
5452
5453 st = sb_in->stmts[i];
5454 first_stmt = sb_out->stmts_used;
5455
5456 if (verboze) {
5457 VG_(printf)("\n");
5458 ppIRStmt(st);
5459 VG_(printf)("\n");
5460 }
5461
5462 if (MC_(clo_mc_level) == 3) {
5463 /* See comments on case Ist_CAS below. */
5464 if (st->tag != Ist_CAS)
5465 schemeS( &mce, st );
5466 }
5467
5468 /* Generate instrumentation code for each stmt ... */
5469
5470 switch (st->tag) {
5471
5472 case Ist_WrTmp:
5473 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
5474 expr2vbits( &mce, st->Ist.WrTmp.data) );
5475 break;
5476
5477 case Ist_Put:
5478 do_shadow_PUT( &mce,
5479 st->Ist.Put.offset,
5480 st->Ist.Put.data,
5481 NULL /* shadow atom */, NULL /* guard */ );
5482 break;
5483
5484 case Ist_PutI:
5485 do_shadow_PUTI( &mce, st->Ist.PutI.details);
5486 break;
5487
5488 case Ist_Store:
5489 do_shadow_Store( &mce, st->Ist.Store.end,
5490 st->Ist.Store.addr, 0/* addr bias */,
5491 st->Ist.Store.data,
5492 NULL /* shadow data */,
5493 NULL/*guard*/ );
5494 break;
5495
5496 case Ist_Exit:
5497 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
5498 break;
5499
5500 case Ist_IMark:
5501 break;
5502
5503 case Ist_NoOp:
5504 case Ist_MBE:
5505 break;
5506
5507 case Ist_Dirty:
5508 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
5509 break;
5510
5511 case Ist_AbiHint:
5512 do_AbiHint( &mce, st->Ist.AbiHint.base,
5513 st->Ist.AbiHint.len,
5514 st->Ist.AbiHint.nia );
5515 break;
5516
5517 case Ist_CAS:
5518 do_shadow_CAS( &mce, st->Ist.CAS.details );
5519 /* Note, do_shadow_CAS copies the CAS itself to the output
5520 block, because it needs to add instrumentation both
5521 before and after it. Hence skip the copy below. Also
5522 skip the origin-tracking stuff (call to schemeS) above,
5523 since that's all tangled up with it too; do_shadow_CAS
5524 does it all. */
5525 break;
5526
5527 case Ist_LLSC:
5528 do_shadow_LLSC( &mce,
5529 st->Ist.LLSC.end,
5530 st->Ist.LLSC.result,
5531 st->Ist.LLSC.addr,
5532 st->Ist.LLSC.storedata );
5533 break;
5534
5535 default:
5536 VG_(printf)("\n");
5537 ppIRStmt(st);
5538 VG_(printf)("\n");
5539 VG_(tool_panic)("memcheck: unhandled IRStmt");
5540
5541 } /* switch (st->tag) */
5542
5543 if (0 && verboze) {
5544 for (j = first_stmt; j < sb_out->stmts_used; j++) {
5545 VG_(printf)(" ");
5546 ppIRStmt(sb_out->stmts[j]);
5547 VG_(printf)("\n");
5548 }
5549 VG_(printf)("\n");
5550 }
5551
5552 /* ... and finally copy the stmt itself to the output. Except,
5553 skip the copy of IRCASs; see comments on case Ist_CAS
5554 above. */
5555 if (st->tag != Ist_CAS)
5556 stmt('C', &mce, st);
5557 }
5558
5559 /* Now we need to complain if the jump target is undefined. */
5560 first_stmt = sb_out->stmts_used;
5561
5562 if (verboze) {
5563 VG_(printf)("sb_in->next = ");
5564 ppIRExpr(sb_in->next);
5565 VG_(printf)("\n\n");
5566 }
5567
5568 complainIfUndefined( &mce, sb_in->next, NULL );
5569
5570 if (0 && verboze) {
5571 for (j = first_stmt; j < sb_out->stmts_used; j++) {
5572 VG_(printf)(" ");
5573 ppIRStmt(sb_out->stmts[j]);
5574 VG_(printf)("\n");
5575 }
5576 VG_(printf)("\n");
5577 }
5578
5579 /* If this fails, there's been some serious snafu with tmp management,
5580 that should be investigated. */
5581 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
5582 VG_(deleteXA)( mce.tmpMap );
5583
5584 tl_assert(mce.sb == sb_out);
5585 return sb_out;
5586 }
5587
5588 /*------------------------------------------------------------*/
5589 /*--- Post-tree-build final tidying ---*/
5590 /*------------------------------------------------------------*/
5591
5592 /* This exploits the observation that Memcheck often produces
5593 repeated conditional calls of the form
5594
5595 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
5596
5597 with the same guard expression G guarding the same helper call.
5598 The second and subsequent calls are redundant. This usually
5599 results from instrumentation of guest code containing multiple
5600 memory references at different constant offsets from the same base
5601 register. After optimisation of the instrumentation, you get a
5602 test for the definedness of the base register for each memory
5603 reference, which is kinda pointless. MC_(final_tidy) therefore
5604 looks for such repeated calls and removes all but the first. */
5605
5606 /* A struct for recording which (helper, guard) pairs we have already
5607 seen. */
5608 typedef
5609 struct { void* entry; IRExpr* guard; }
5610 Pair;
5611
5612 /* Return True if e1 and e2 definitely denote the same value (used to
5613 compare guards). Return False if unknown; False is the safe
5614 answer. Since guest registers and guest memory do not have the
5615 SSA property we must return False if any Gets or Loads appear in
5616 the expression. */
5617
sameIRValue(IRExpr * e1,IRExpr * e2)5618 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
5619 {
5620 if (e1->tag != e2->tag)
5621 return False;
5622 switch (e1->tag) {
5623 case Iex_Const:
5624 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
5625 case Iex_Binop:
5626 return e1->Iex.Binop.op == e2->Iex.Binop.op
5627 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
5628 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
5629 case Iex_Unop:
5630 return e1->Iex.Unop.op == e2->Iex.Unop.op
5631 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
5632 case Iex_RdTmp:
5633 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
5634 case Iex_Mux0X:
5635 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
5636 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
5637 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
5638 case Iex_Qop:
5639 case Iex_Triop:
5640 case Iex_CCall:
5641 /* be lazy. Could define equality for these, but they never
5642 appear to be used. */
5643 return False;
5644 case Iex_Get:
5645 case Iex_GetI:
5646 case Iex_Load:
5647 /* be conservative - these may not give the same value each
5648 time */
5649 return False;
5650 case Iex_Binder:
5651 /* should never see this */
5652 /* fallthrough */
5653 default:
5654 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5655 ppIRExpr(e1);
5656 VG_(tool_panic)("memcheck:sameIRValue");
5657 return False;
5658 }
5659 }
5660
5661 /* See if 'pairs' already has an entry for (entry, guard). Return
5662 True if so. If not, add an entry. */
5663
5664 static
check_or_add(XArray * pairs,IRExpr * guard,void * entry)5665 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5666 {
5667 Pair p;
5668 Pair* pp;
5669 Int i, n = VG_(sizeXA)( pairs );
5670 for (i = 0; i < n; i++) {
5671 pp = VG_(indexXA)( pairs, i );
5672 if (pp->entry == entry && sameIRValue(pp->guard, guard))
5673 return True;
5674 }
5675 p.guard = guard;
5676 p.entry = entry;
5677 VG_(addToXA)( pairs, &p );
5678 return False;
5679 }
5680
is_helperc_value_checkN_fail(HChar * name)5681 static Bool is_helperc_value_checkN_fail ( HChar* name )
5682 {
5683 return
5684 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5685 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5686 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5687 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5688 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5689 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5690 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5691 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
5692 }
5693
MC_(final_tidy)5694 IRSB* MC_(final_tidy) ( IRSB* sb_in )
5695 {
5696 Int i;
5697 IRStmt* st;
5698 IRDirty* di;
5699 IRExpr* guard;
5700 IRCallee* cee;
5701 Bool alreadyPresent;
5702 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5703 VG_(free), sizeof(Pair) );
5704 /* Scan forwards through the statements. Each time a call to one
5705 of the relevant helpers is seen, check if we have made a
5706 previous call to the same helper using the same guard
5707 expression, and if so, delete the call. */
5708 for (i = 0; i < sb_in->stmts_used; i++) {
5709 st = sb_in->stmts[i];
5710 tl_assert(st);
5711 if (st->tag != Ist_Dirty)
5712 continue;
5713 di = st->Ist.Dirty.details;
5714 guard = di->guard;
5715 if (!guard)
5716 continue;
5717 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5718 cee = di->cee;
5719 if (!is_helperc_value_checkN_fail( cee->name ))
5720 continue;
5721 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5722 guard 'guard'. Check if we have already seen a call to this
5723 function with the same guard. If so, delete it. If not,
5724 add it to the set of calls we do know about. */
5725 alreadyPresent = check_or_add( pairs, guard, cee->addr );
5726 if (alreadyPresent) {
5727 sb_in->stmts[i] = IRStmt_NoOp();
5728 if (0) VG_(printf)("XX\n");
5729 }
5730 }
5731 VG_(deleteXA)( pairs );
5732 return sb_in;
5733 }
5734
5735
5736 /*------------------------------------------------------------*/
5737 /*--- Origin tracking stuff ---*/
5738 /*------------------------------------------------------------*/
5739
5740 /* Almost identical to findShadowTmpV. */
findShadowTmpB(MCEnv * mce,IRTemp orig)5741 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5742 {
5743 TempMapEnt* ent;
5744 /* VG_(indexXA) range-checks 'orig', hence no need to check
5745 here. */
5746 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5747 tl_assert(ent->kind == Orig);
5748 if (ent->shadowB == IRTemp_INVALID) {
5749 IRTemp tmpB
5750 = newTemp( mce, Ity_I32, BSh );
5751 /* newTemp may cause mce->tmpMap to resize, hence previous results
5752 from VG_(indexXA) are invalid. */
5753 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5754 tl_assert(ent->kind == Orig);
5755 tl_assert(ent->shadowB == IRTemp_INVALID);
5756 ent->shadowB = tmpB;
5757 }
5758 return ent->shadowB;
5759 }
5760
gen_maxU32(MCEnv * mce,IRAtom * b1,IRAtom * b2)5761 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5762 {
5763 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5764 }
5765
gen_load_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset)5766 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5767 IRAtom* baseaddr, Int offset )
5768 {
5769 void* hFun;
5770 HChar* hName;
5771 IRTemp bTmp;
5772 IRDirty* di;
5773 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5774 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5775 IRAtom* ea = baseaddr;
5776 if (offset != 0) {
5777 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5778 : mkU64( (Long)(Int)offset );
5779 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5780 }
5781 bTmp = newTemp(mce, mce->hWordTy, BSh);
5782
5783 switch (szB) {
5784 case 1: hFun = (void*)&MC_(helperc_b_load1);
5785 hName = "MC_(helperc_b_load1)";
5786 break;
5787 case 2: hFun = (void*)&MC_(helperc_b_load2);
5788 hName = "MC_(helperc_b_load2)";
5789 break;
5790 case 4: hFun = (void*)&MC_(helperc_b_load4);
5791 hName = "MC_(helperc_b_load4)";
5792 break;
5793 case 8: hFun = (void*)&MC_(helperc_b_load8);
5794 hName = "MC_(helperc_b_load8)";
5795 break;
5796 case 16: hFun = (void*)&MC_(helperc_b_load16);
5797 hName = "MC_(helperc_b_load16)";
5798 break;
5799 case 32: hFun = (void*)&MC_(helperc_b_load32);
5800 hName = "MC_(helperc_b_load32)";
5801 break;
5802 default:
5803 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5804 tl_assert(0);
5805 }
5806 di = unsafeIRDirty_1_N(
5807 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5808 mkIRExprVec_1( ea )
5809 );
5810 /* no need to mess with any annotations. This call accesses
5811 neither guest state nor guest memory. */
5812 stmt( 'B', mce, IRStmt_Dirty(di) );
5813 if (mce->hWordTy == Ity_I64) {
5814 /* 64-bit host */
5815 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
5816 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5817 return mkexpr(bTmp32);
5818 } else {
5819 /* 32-bit host */
5820 return mkexpr(bTmp);
5821 }
5822 }
5823
gen_guarded_load_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset,IRAtom * guard)5824 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
5825 Int offset, IRAtom* guard )
5826 {
5827 if (guard) {
5828 IRAtom *cond, *iffalse, *iftrue;
5829
5830 cond = assignNew('B', mce, Ity_I8, unop(Iop_1Uto8, guard));
5831 iftrue = assignNew('B', mce, Ity_I32,
5832 gen_load_b(mce, szB, baseaddr, offset));
5833 iffalse = mkU32(0);
5834
5835 return assignNew('B', mce, Ity_I32, IRExpr_Mux0X(cond, iffalse, iftrue));
5836 }
5837
5838 return gen_load_b(mce, szB, baseaddr, offset);
5839 }
5840
5841 /* Generate a shadow store. guard :: Ity_I1 controls whether the
5842 store really happens; NULL means it unconditionally does. */
gen_store_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset,IRAtom * dataB,IRAtom * guard)5843 static void gen_store_b ( MCEnv* mce, Int szB,
5844 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5845 IRAtom* guard )
5846 {
5847 void* hFun;
5848 HChar* hName;
5849 IRDirty* di;
5850 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5851 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5852 IRAtom* ea = baseaddr;
5853 if (guard) {
5854 tl_assert(isOriginalAtom(mce, guard));
5855 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5856 }
5857 if (offset != 0) {
5858 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5859 : mkU64( (Long)(Int)offset );
5860 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5861 }
5862 if (mce->hWordTy == Ity_I64)
5863 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5864
5865 switch (szB) {
5866 case 1: hFun = (void*)&MC_(helperc_b_store1);
5867 hName = "MC_(helperc_b_store1)";
5868 break;
5869 case 2: hFun = (void*)&MC_(helperc_b_store2);
5870 hName = "MC_(helperc_b_store2)";
5871 break;
5872 case 4: hFun = (void*)&MC_(helperc_b_store4);
5873 hName = "MC_(helperc_b_store4)";
5874 break;
5875 case 8: hFun = (void*)&MC_(helperc_b_store8);
5876 hName = "MC_(helperc_b_store8)";
5877 break;
5878 case 16: hFun = (void*)&MC_(helperc_b_store16);
5879 hName = "MC_(helperc_b_store16)";
5880 break;
5881 case 32: hFun = (void*)&MC_(helperc_b_store32);
5882 hName = "MC_(helperc_b_store32)";
5883 break;
5884 default:
5885 tl_assert(0);
5886 }
5887 di = unsafeIRDirty_0_N( 2/*regparms*/,
5888 hName, VG_(fnptr_to_fnentry)( hFun ),
5889 mkIRExprVec_2( ea, dataB )
5890 );
5891 /* no need to mess with any annotations. This call accesses
5892 neither guest state nor guest memory. */
5893 if (guard) di->guard = guard;
5894 stmt( 'B', mce, IRStmt_Dirty(di) );
5895 }
5896
narrowTo32(MCEnv * mce,IRAtom * e)5897 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
5898 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5899 if (eTy == Ity_I64)
5900 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5901 if (eTy == Ity_I32)
5902 return e;
5903 tl_assert(0);
5904 }
5905
zWidenFrom32(MCEnv * mce,IRType dstTy,IRAtom * e)5906 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
5907 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5908 tl_assert(eTy == Ity_I32);
5909 if (dstTy == Ity_I64)
5910 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
5911 tl_assert(0);
5912 }
5913
5914
schemeE(MCEnv * mce,IRExpr * e)5915 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
5916 {
5917 tl_assert(MC_(clo_mc_level) == 3);
5918
5919 switch (e->tag) {
5920
5921 case Iex_GetI: {
5922 IRRegArray* descr_b;
5923 IRAtom *t1, *t2, *t3, *t4;
5924 IRRegArray* descr = e->Iex.GetI.descr;
5925 IRType equivIntTy
5926 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5927 /* If this array is unshadowable for whatever reason, use the
5928 usual approximation. */
5929 if (equivIntTy == Ity_INVALID)
5930 return mkU32(0);
5931 tl_assert(sizeofIRType(equivIntTy) >= 4);
5932 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5933 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5934 equivIntTy, descr->nElems );
5935 /* Do a shadow indexed get of the same size, giving t1. Take
5936 the bottom 32 bits of it, giving t2. Compute into t3 the
5937 origin for the index (almost certainly zero, but there's
5938 no harm in being completely general here, since iropt will
5939 remove any useless code), and fold it in, giving a final
5940 value t4. */
5941 t1 = assignNew( 'B', mce, equivIntTy,
5942 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
5943 e->Iex.GetI.bias ));
5944 t2 = narrowTo32( mce, t1 );
5945 t3 = schemeE( mce, e->Iex.GetI.ix );
5946 t4 = gen_maxU32( mce, t2, t3 );
5947 return t4;
5948 }
5949 case Iex_CCall: {
5950 Int i;
5951 IRAtom* here;
5952 IRExpr** args = e->Iex.CCall.args;
5953 IRAtom* curr = mkU32(0);
5954 for (i = 0; args[i]; i++) {
5955 tl_assert(i < 32);
5956 tl_assert(isOriginalAtom(mce, args[i]));
5957 /* Only take notice of this arg if the callee's
5958 mc-exclusion mask does not say it is to be excluded. */
5959 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
5960 /* the arg is to be excluded from definedness checking.
5961 Do nothing. */
5962 if (0) VG_(printf)("excluding %s(%d)\n",
5963 e->Iex.CCall.cee->name, i);
5964 } else {
5965 /* calculate the arg's definedness, and pessimistically
5966 merge it in. */
5967 here = schemeE( mce, args[i] );
5968 curr = gen_maxU32( mce, curr, here );
5969 }
5970 }
5971 return curr;
5972 }
5973 case Iex_Load: {
5974 Int dszB;
5975 dszB = sizeofIRType(e->Iex.Load.ty);
5976 /* assert that the B value for the address is already
5977 available (somewhere) */
5978 tl_assert(isIRAtom(e->Iex.Load.addr));
5979 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
5980 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
5981 }
5982 case Iex_Mux0X: {
5983 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
5984 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
5985 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
5986 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
5987 }
5988 case Iex_Qop: {
5989 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
5990 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
5991 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
5992 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
5993 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
5994 gen_maxU32( mce, b3, b4 ) );
5995 }
5996 case Iex_Triop: {
5997 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
5998 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
5999 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
6000 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6001 }
6002 case Iex_Binop: {
6003 switch (e->Iex.Binop.op) {
6004 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
6005 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6006 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6007 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6008 /* Just say these all produce a defined result,
6009 regardless of their arguments. See
6010 COMMENT_ON_CasCmpEQ in this file. */
6011 return mkU32(0);
6012 default: {
6013 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6014 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6015 return gen_maxU32( mce, b1, b2 );
6016 }
6017 }
6018 tl_assert(0);
6019 /*NOTREACHED*/
6020 }
6021 case Iex_Unop: {
6022 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6023 return b1;
6024 }
6025 case Iex_Const:
6026 return mkU32(0);
6027 case Iex_RdTmp:
6028 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6029 case Iex_Get: {
6030 Int b_offset = MC_(get_otrack_shadow_offset)(
6031 e->Iex.Get.offset,
6032 sizeofIRType(e->Iex.Get.ty)
6033 );
6034 tl_assert(b_offset >= -1
6035 && b_offset <= mce->layout->total_sizeB -4);
6036 if (b_offset >= 0) {
6037 /* FIXME: this isn't an atom! */
6038 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6039 Ity_I32 );
6040 }
6041 return mkU32(0);
6042 }
6043 default:
6044 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6045 ppIRExpr(e);
6046 VG_(tool_panic)("memcheck:schemeE");
6047 }
6048 }
6049
6050
do_origins_Dirty(MCEnv * mce,IRDirty * d)6051 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6052 {
6053 // This is a hacked version of do_shadow_Dirty
6054 Int i, k, n, toDo, gSz, gOff;
6055 IRAtom *here, *curr;
6056 IRTemp dst;
6057
6058 /* First check the guard. */
6059 curr = schemeE( mce, d->guard );
6060
6061 /* Now round up all inputs and maxU32 over them. */
6062
6063 /* Inputs: unmasked args
6064 Note: arguments are evaluated REGARDLESS of the guard expression */
6065 for (i = 0; d->args[i]; i++) {
6066 if (d->cee->mcx_mask & (1<<i)) {
6067 /* ignore this arg */
6068 } else {
6069 here = schemeE( mce, d->args[i] );
6070 curr = gen_maxU32( mce, curr, here );
6071 }
6072 }
6073
6074 /* Inputs: guest state that we read. */
6075 for (i = 0; i < d->nFxState; i++) {
6076 tl_assert(d->fxState[i].fx != Ifx_None);
6077 if (d->fxState[i].fx == Ifx_Write)
6078 continue;
6079
6080 /* Enumerate the described state segments */
6081 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6082 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6083 gSz = d->fxState[i].size;
6084
6085 /* Ignore any sections marked as 'always defined'. */
6086 if (isAlwaysDefd(mce, gOff, gSz)) {
6087 if (0)
6088 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6089 gOff, gSz);
6090 continue;
6091 }
6092
6093 /* This state element is read or modified. So we need to
6094 consider it. If larger than 4 bytes, deal with it in
6095 4-byte chunks. */
6096 while (True) {
6097 Int b_offset;
6098 tl_assert(gSz >= 0);
6099 if (gSz == 0) break;
6100 n = gSz <= 4 ? gSz : 4;
6101 /* update 'curr' with maxU32 of the state slice
6102 gOff .. gOff+n-1 */
6103 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6104 if (b_offset != -1) {
6105 /* Observe the guard expression. If it is false use 0, i.e.
6106 nothing is known about the origin */
6107 IRAtom *cond, *iffalse, *iftrue;
6108
6109 cond = assignNew( 'B', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
6110 iffalse = mkU32(0);
6111 iftrue = assignNew( 'B', mce, Ity_I32,
6112 IRExpr_Get(b_offset
6113 + 2*mce->layout->total_sizeB,
6114 Ity_I32));
6115 here = assignNew( 'B', mce, Ity_I32,
6116 IRExpr_Mux0X(cond, iffalse, iftrue));
6117 curr = gen_maxU32( mce, curr, here );
6118 }
6119 gSz -= n;
6120 gOff += n;
6121 }
6122 }
6123 }
6124
6125 /* Inputs: memory */
6126
6127 if (d->mFx != Ifx_None) {
6128 /* Because we may do multiple shadow loads/stores from the same
6129 base address, it's best to do a single test of its
6130 definedness right now. Post-instrumentation optimisation
6131 should remove all but this test. */
6132 tl_assert(d->mAddr);
6133 here = schemeE( mce, d->mAddr );
6134 curr = gen_maxU32( mce, curr, here );
6135 }
6136
6137 /* Deal with memory inputs (reads or modifies) */
6138 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
6139 toDo = d->mSize;
6140 /* chew off 32-bit chunks. We don't care about the endianness
6141 since it's all going to be condensed down to a single bit,
6142 but nevertheless choose an endianness which is hopefully
6143 native to the platform. */
6144 while (toDo >= 4) {
6145 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6146 d->guard );
6147 curr = gen_maxU32( mce, curr, here );
6148 toDo -= 4;
6149 }
6150 /* handle possible 16-bit excess */
6151 while (toDo >= 2) {
6152 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6153 d->guard );
6154 curr = gen_maxU32( mce, curr, here );
6155 toDo -= 2;
6156 }
6157 /* chew off the remaining 8-bit chunk, if any */
6158 if (toDo == 1) {
6159 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6160 d->guard );
6161 curr = gen_maxU32( mce, curr, here );
6162 toDo -= 1;
6163 }
6164 tl_assert(toDo == 0);
6165 }
6166
6167 /* Whew! So curr is a 32-bit B-value which should give an origin
6168 of some use if any of the inputs to the helper are undefined.
6169 Now we need to re-distribute the results to all destinations. */
6170
6171 /* Outputs: the destination temporary, if there is one. */
6172 if (d->tmp != IRTemp_INVALID) {
6173 dst = findShadowTmpB(mce, d->tmp);
6174 assign( 'V', mce, dst, curr );
6175 }
6176
6177 /* Outputs: guest state that we write or modify. */
6178 for (i = 0; i < d->nFxState; i++) {
6179 tl_assert(d->fxState[i].fx != Ifx_None);
6180 if (d->fxState[i].fx == Ifx_Read)
6181 continue;
6182
6183 /* Enumerate the described state segments */
6184 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6185 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6186 gSz = d->fxState[i].size;
6187
6188 /* Ignore any sections marked as 'always defined'. */
6189 if (isAlwaysDefd(mce, gOff, gSz))
6190 continue;
6191
6192 /* This state element is written or modified. So we need to
6193 consider it. If larger than 4 bytes, deal with it in
6194 4-byte chunks. */
6195 while (True) {
6196 Int b_offset;
6197 tl_assert(gSz >= 0);
6198 if (gSz == 0) break;
6199 n = gSz <= 4 ? gSz : 4;
6200 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6201 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6202 if (b_offset != -1) {
6203 if (d->guard) {
6204 /* If the guard expression evaluates to false we simply Put
6205 the value that is already stored in the guest state slot */
6206 IRAtom *cond, *iffalse;
6207
6208 cond = assignNew('B', mce, Ity_I8,
6209 unop(Iop_1Uto8, d->guard));
6210 iffalse = assignNew('B', mce, Ity_I32,
6211 IRExpr_Get(b_offset +
6212 2*mce->layout->total_sizeB,
6213 Ity_I32));
6214 curr = assignNew('V', mce, Ity_I32,
6215 IRExpr_Mux0X(cond, iffalse, curr));
6216 }
6217 stmt( 'B', mce, IRStmt_Put(b_offset
6218 + 2*mce->layout->total_sizeB,
6219 curr ));
6220 }
6221 gSz -= n;
6222 gOff += n;
6223 }
6224 }
6225 }
6226
6227 /* Outputs: memory that we write or modify. Same comments about
6228 endianness as above apply. */
6229 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
6230 toDo = d->mSize;
6231 /* chew off 32-bit chunks */
6232 while (toDo >= 4) {
6233 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
6234 d->guard );
6235 toDo -= 4;
6236 }
6237 /* handle possible 16-bit excess */
6238 while (toDo >= 2) {
6239 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
6240 d->guard );
6241 toDo -= 2;
6242 }
6243 /* chew off the remaining 8-bit chunk, if any */
6244 if (toDo == 1) {
6245 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
6246 d->guard );
6247 toDo -= 1;
6248 }
6249 tl_assert(toDo == 0);
6250 }
6251 }
6252
6253
do_origins_Store(MCEnv * mce,IREndness stEnd,IRExpr * stAddr,IRExpr * stData)6254 static void do_origins_Store ( MCEnv* mce,
6255 IREndness stEnd,
6256 IRExpr* stAddr,
6257 IRExpr* stData )
6258 {
6259 Int dszB;
6260 IRAtom* dataB;
6261 /* assert that the B value for the address is already available
6262 (somewhere), since the call to schemeE will want to see it.
6263 XXXX how does this actually ensure that?? */
6264 tl_assert(isIRAtom(stAddr));
6265 tl_assert(isIRAtom(stData));
6266 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
6267 dataB = schemeE( mce, stData );
6268 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
6269 NULL/*guard*/ );
6270 }
6271
6272
schemeS(MCEnv * mce,IRStmt * st)6273 static void schemeS ( MCEnv* mce, IRStmt* st )
6274 {
6275 tl_assert(MC_(clo_mc_level) == 3);
6276
6277 switch (st->tag) {
6278
6279 case Ist_AbiHint:
6280 /* The value-check instrumenter handles this - by arranging
6281 to pass the address of the next instruction to
6282 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
6283 happen for origin tracking w.r.t. AbiHints. So there is
6284 nothing to do here. */
6285 break;
6286
6287 case Ist_PutI: {
6288 IRPutI *puti = st->Ist.PutI.details;
6289 IRRegArray* descr_b;
6290 IRAtom *t1, *t2, *t3, *t4;
6291 IRRegArray* descr = puti->descr;
6292 IRType equivIntTy
6293 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6294 /* If this array is unshadowable for whatever reason,
6295 generate no code. */
6296 if (equivIntTy == Ity_INVALID)
6297 break;
6298 tl_assert(sizeofIRType(equivIntTy) >= 4);
6299 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6300 descr_b
6301 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6302 equivIntTy, descr->nElems );
6303 /* Compute a value to Put - the conjoinment of the origin for
6304 the data to be Put-ted (obviously) and of the index value
6305 (not so obviously). */
6306 t1 = schemeE( mce, puti->data );
6307 t2 = schemeE( mce, puti->ix );
6308 t3 = gen_maxU32( mce, t1, t2 );
6309 t4 = zWidenFrom32( mce, equivIntTy, t3 );
6310 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
6311 puti->bias, t4) ));
6312 break;
6313 }
6314
6315 case Ist_Dirty:
6316 do_origins_Dirty( mce, st->Ist.Dirty.details );
6317 break;
6318
6319 case Ist_Store:
6320 do_origins_Store( mce, st->Ist.Store.end,
6321 st->Ist.Store.addr,
6322 st->Ist.Store.data );
6323 break;
6324
6325 case Ist_LLSC: {
6326 /* In short: treat a load-linked like a normal load followed
6327 by an assignment of the loaded (shadow) data the result
6328 temporary. Treat a store-conditional like a normal store,
6329 and mark the result temporary as defined. */
6330 if (st->Ist.LLSC.storedata == NULL) {
6331 /* Load Linked */
6332 IRType resTy
6333 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
6334 IRExpr* vanillaLoad
6335 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
6336 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6337 || resTy == Ity_I16 || resTy == Ity_I8);
6338 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6339 schemeE(mce, vanillaLoad));
6340 } else {
6341 /* Store conditional */
6342 do_origins_Store( mce, st->Ist.LLSC.end,
6343 st->Ist.LLSC.addr,
6344 st->Ist.LLSC.storedata );
6345 /* For the rationale behind this, see comments at the
6346 place where the V-shadow for .result is constructed, in
6347 do_shadow_LLSC. In short, we regard .result as
6348 always-defined. */
6349 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6350 mkU32(0) );
6351 }
6352 break;
6353 }
6354
6355 case Ist_Put: {
6356 Int b_offset
6357 = MC_(get_otrack_shadow_offset)(
6358 st->Ist.Put.offset,
6359 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
6360 );
6361 if (b_offset >= 0) {
6362 /* FIXME: this isn't an atom! */
6363 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
6364 schemeE( mce, st->Ist.Put.data )) );
6365 }
6366 break;
6367 }
6368
6369 case Ist_WrTmp:
6370 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
6371 schemeE(mce, st->Ist.WrTmp.data) );
6372 break;
6373
6374 case Ist_MBE:
6375 case Ist_NoOp:
6376 case Ist_Exit:
6377 case Ist_IMark:
6378 break;
6379
6380 default:
6381 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
6382 ppIRStmt(st);
6383 VG_(tool_panic)("memcheck:schemeS");
6384 }
6385 }
6386
6387
6388 /*--------------------------------------------------------------------*/
6389 /*--- end mc_translate.c ---*/
6390 /*--------------------------------------------------------------------*/
6391