1
2 /*---------------------------------------------------------------*/
3 /*--- begin test_main.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2011 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <assert.h>
39 #include <string.h>
40
41 #include "libvex_basictypes.h"
42 #include "libvex.h"
43
44 #include "test_main.h"
45
46
47 /*---------------------------------------------------------------*/
48 /*--- Test ---*/
49 /*---------------------------------------------------------------*/
50
51
52 __attribute__ ((noreturn))
53 static
failure_exit(void)54 void failure_exit ( void )
55 {
56 fprintf(stdout, "VEX did failure_exit. Bye.\n");
57 exit(1);
58 }
59
60 static
log_bytes(HChar * bytes,Int nbytes)61 void log_bytes ( HChar* bytes, Int nbytes )
62 {
63 fwrite ( bytes, 1, nbytes, stdout );
64 }
65
66 #define N_LINEBUF 10000
67 static HChar linebuf[N_LINEBUF];
68
69 #define N_ORIGBUF 10000
70 #define N_TRANSBUF 5000
71
72 static UChar origbuf[N_ORIGBUF];
73 static UChar transbuf[N_TRANSBUF];
74
75 static Bool verbose = True;
76
77 /* Forwards */
78 #if 1 /* UNUSED */
79 //static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
80 static
81 IRSB* mc_instrument ( void* closureV,
82 IRSB* bb_in, VexGuestLayout* layout,
83 VexGuestExtents* vge,
84 IRType gWordTy, IRType hWordTy );
85 #endif
86
chase_into_not_ok(void * opaque,Addr64 dst)87 static Bool chase_into_not_ok ( void* opaque, Addr64 dst ) {
88 return False;
89 }
needs_self_check(void * opaque,VexGuestExtents * vge)90 static UInt needs_self_check ( void* opaque, VexGuestExtents* vge ) {
91 return 0;
92 }
93
main(int argc,char ** argv)94 int main ( int argc, char** argv )
95 {
96 FILE* f;
97 Int i;
98 UInt u, sum;
99 Addr32 orig_addr;
100 Int bb_number, n_bbs_done = 0;
101 Int orig_nbytes, trans_used;
102 VexTranslateResult tres;
103 VexControl vcon;
104 VexGuestExtents vge;
105 VexArchInfo vai_x86, vai_amd64, vai_ppc32;
106 VexAbiInfo vbi;
107 VexTranslateArgs vta;
108
109 if (argc != 2) {
110 fprintf(stderr, "usage: vex file.org\n");
111 exit(1);
112 }
113 f = fopen(argv[1], "r");
114 if (!f) {
115 fprintf(stderr, "can't open `%s'\n", argv[1]);
116 exit(1);
117 }
118
119 /* Run with default params. However, we can't allow bb chasing
120 since that causes the front end to get segfaults when it tries
121 to read code outside the initial BB we hand it. So when calling
122 LibVEX_Translate, send in a chase-into predicate that always
123 returns False. */
124 LibVEX_default_VexControl ( &vcon );
125 vcon.iropt_level = 2;
126 vcon.guest_max_insns = 50;
127
128 LibVEX_Init ( &failure_exit, &log_bytes,
129 1, /* debug_paranoia */
130 TEST_VSUPPORT, /* valgrind support */
131 &vcon );
132
133
134 while (!feof(f)) {
135
136 __attribute__((unused))
137 char* unused1 = fgets(linebuf, N_LINEBUF,f);
138 if (linebuf[0] == 0) continue;
139 if (linebuf[0] != '.') continue;
140
141 if (n_bbs_done == TEST_N_BBS) break;
142 n_bbs_done++;
143
144 /* first line is: . bb-number bb-addr n-bytes */
145 assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
146 & bb_number,
147 & orig_addr, & orig_nbytes ));
148 assert(orig_nbytes >= 1);
149 assert(!feof(f));
150 __attribute__((unused))
151 char* unused2 = fgets(linebuf, N_LINEBUF,f);
152 assert(linebuf[0] == '.');
153
154 /* second line is: . byte byte byte etc */
155 if (verbose)
156 printf("============ Basic Block %d, Done %d, "
157 "Start %x, nbytes %2d ============",
158 bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
159
160 assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
161 for (i = 0; i < orig_nbytes; i++) {
162 assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
163 origbuf[i] = (UChar)u;
164 }
165
166 /* FIXME: put sensible values into the .hwcaps fields */
167 LibVEX_default_VexArchInfo(&vai_x86);
168 vai_x86.hwcaps = VEX_HWCAPS_X86_SSE1
169 | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
170
171 LibVEX_default_VexArchInfo(&vai_amd64);
172 vai_amd64.hwcaps = 0;
173
174 LibVEX_default_VexArchInfo(&vai_ppc32);
175 vai_ppc32.hwcaps = 0;
176 vai_ppc32.ppc_cache_line_szB = 128;
177
178 LibVEX_default_VexAbiInfo(&vbi);
179
180 /* ----- Set up args for LibVEX_Translate ----- */
181 #if 0 /* ppc32 -> ppc32 */
182 vta.arch_guest = VexArchPPC32;
183 vta.archinfo_guest = vai_ppc32;
184 vta.arch_host = VexArchPPC32;
185 vta.archinfo_host = vai_ppc32;
186 #endif
187 #if 0 /* amd64 -> amd64 */
188 vta.arch_guest = VexArchAMD64;
189 vta.archinfo_guest = vai_amd64;
190 vta.arch_host = VexArchAMD64;
191 vta.archinfo_host = vai_amd64;
192 #endif
193 #if 1 /* x86 -> x86 */
194 vta.arch_guest = VexArchX86;
195 vta.archinfo_guest = vai_x86;
196 vta.arch_host = VexArchX86;
197 vta.archinfo_host = vai_x86;
198 #endif
199 vta.abiinfo_both = vbi;
200 vta.guest_bytes = origbuf;
201 vta.guest_bytes_addr = (Addr64)orig_addr;
202 vta.callback_opaque = NULL;
203 vta.chase_into_ok = chase_into_not_ok;
204 vta.guest_extents = &vge;
205 vta.host_bytes = transbuf;
206 vta.host_bytes_size = N_TRANSBUF;
207 vta.host_bytes_used = &trans_used;
208 #if 0 /* no instrumentation */
209 vta.instrument1 = NULL;
210 vta.instrument2 = NULL;
211 #endif
212 #if 0 /* addrcheck */
213 vta.instrument1 = ac_instrument;
214 vta.instrument2 = NULL;
215 #endif
216 #if 1 /* memcheck */
217 vta.instrument1 = mc_instrument;
218 vta.instrument2 = NULL;
219 #endif
220 vta.needs_self_check = needs_self_check;
221 vta.preamble_function = NULL;
222 vta.traceflags = TEST_FLAGS;
223 #if 1 /* x86, amd64 hosts */
224 vta.dispatch_unassisted = (void*)0x12345678;
225 vta.dispatch_assisted = (void*)0x12345678;
226 #else /* ppc32, ppc64 hosts */
227 vta.dispatch = NULL;
228 #endif
229
230 vta.finaltidy = NULL;
231
232 for (i = 0; i < TEST_N_ITERS; i++)
233 tres = LibVEX_Translate ( &vta );
234
235 if (tres.status != VexTransOK)
236 printf("\ntres = %d\n", (Int)tres.status);
237 assert(tres.status == VexTransOK);
238 assert(tres.n_sc_extents == 0);
239 assert(vge.n_used == 1);
240 assert((UInt)(vge.len[0]) == orig_nbytes);
241
242 sum = 0;
243 for (i = 0; i < trans_used; i++)
244 sum += (UInt)transbuf[i];
245 printf ( " %6.2f ... %u\n",
246 (double)trans_used / (double)vge.len[0], sum );
247 }
248
249 fclose(f);
250 printf("\n");
251 LibVEX_ShowAllocStats();
252
253 return 0;
254 }
255
256 //////////////////////////////////////////////////////////////////////
257 //////////////////////////////////////////////////////////////////////
258 //////////////////////////////////////////////////////////////////////
259 //////////////////////////////////////////////////////////////////////
260 //////////////////////////////////////////////////////////////////////
261 //////////////////////////////////////////////////////////////////////
262 //////////////////////////////////////////////////////////////////////
263 //////////////////////////////////////////////////////////////////////
264
265 #if 0 /* UNUSED */
266
267 static
268 __attribute((noreturn))
269 void panic ( HChar* s )
270 {
271 printf("\npanic: %s\n", s);
272 failure_exit();
273 }
274
275 static
276 IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
277 {
278 /* Use this rather than eg. -1 because it's a UInt. */
279 #define INVALID_DATA_SIZE 999999
280
281 Int i;
282 Int sz;
283 IRCallee* helper;
284 IRStmt* st;
285 IRExpr* data;
286 IRExpr* addr;
287 Bool needSz;
288
289 /* Set up BB */
290 IRSB* bb = emptyIRSB();
291 bb->tyenv = dopyIRTypeEnv(bb_in->tyenv);
292 bb->next = dopyIRExpr(bb_in->next);
293 bb->jumpkind = bb_in->jumpkind;
294
295 /* No loads to consider in ->next. */
296 assert(isIRAtom(bb_in->next));
297
298 for (i = 0; i < bb_in->stmts_used; i++) {
299 st = bb_in->stmts[i];
300 if (!st) continue;
301
302 switch (st->tag) {
303
304 case Ist_Tmp:
305 data = st->Ist.Tmp.data;
306 if (data->tag == Iex_LDle) {
307 addr = data->Iex.LDle.addr;
308 sz = sizeofIRType(data->Iex.LDle.ty);
309 needSz = False;
310 switch (sz) {
311 case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
312 (void*)0x12345601); break;
313 case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
314 (void*)0x12345602); break;
315 case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
316 (void*)0x12345603); break;
317 default: helper = mkIRCallee(0, "ac_helperc_LOADN",
318 (void*)0x12345604);
319 needSz = True; break;
320 }
321 if (needSz) {
322 addStmtToIRSB(
323 bb,
324 IRStmt_Dirty(
325 unsafeIRDirty_0_N( helper->regparms,
326 helper->name, helper->addr,
327 mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
328 ));
329 } else {
330 addStmtToIRSB(
331 bb,
332 IRStmt_Dirty(
333 unsafeIRDirty_0_N( helper->regparms,
334 helper->name, helper->addr,
335 mkIRExprVec_1(addr) )
336 ));
337 }
338 }
339 break;
340
341 case Ist_STle:
342 data = st->Ist.STle.data;
343 addr = st->Ist.STle.addr;
344 assert(isIRAtom(data));
345 assert(isIRAtom(addr));
346 sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
347 needSz = False;
348 switch (sz) {
349 case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
350 (void*)0x12345605); break;
351 case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
352 (void*)0x12345606); break;
353 case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
354 (void*)0x12345607); break;
355 default: helper = mkIRCallee(0, "ac_helperc_STOREN",
356 (void*)0x12345608);
357 needSz = True; break;
358 }
359 if (needSz) {
360 addStmtToIRSB(
361 bb,
362 IRStmt_Dirty(
363 unsafeIRDirty_0_N( helper->regparms,
364 helper->name, helper->addr,
365 mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
366 ));
367 } else {
368 addStmtToIRSB(
369 bb,
370 IRStmt_Dirty(
371 unsafeIRDirty_0_N( helper->regparms,
372 helper->name, helper->addr,
373 mkIRExprVec_1(addr) )
374 ));
375 }
376 break;
377
378 case Ist_Put:
379 assert(isIRAtom(st->Ist.Put.data));
380 break;
381
382 case Ist_PutI:
383 assert(isIRAtom(st->Ist.PutI.ix));
384 assert(isIRAtom(st->Ist.PutI.data));
385 break;
386
387 case Ist_Exit:
388 assert(isIRAtom(st->Ist.Exit.guard));
389 break;
390
391 case Ist_Dirty:
392 /* If the call doesn't interact with memory, we ain't
393 interested. */
394 if (st->Ist.Dirty.details->mFx == Ifx_None)
395 break;
396 goto unhandled;
397
398 default:
399 unhandled:
400 printf("\n");
401 ppIRStmt(st);
402 printf("\n");
403 panic("addrcheck: unhandled IRStmt");
404 }
405
406 addStmtToIRSB( bb, dopyIRStmt(st));
407 }
408
409 return bb;
410 }
411 #endif /* UNUSED */
412
413 //////////////////////////////////////////////////////////////////////
414 //////////////////////////////////////////////////////////////////////
415 //////////////////////////////////////////////////////////////////////
416 //////////////////////////////////////////////////////////////////////
417 //////////////////////////////////////////////////////////////////////
418 //////////////////////////////////////////////////////////////////////
419 //////////////////////////////////////////////////////////////////////
420 //////////////////////////////////////////////////////////////////////
421
422 #if 1 /* UNUSED */
423
424 static
425 __attribute((noreturn))
panic(HChar * s)426 void panic ( HChar* s )
427 {
428 printf("\npanic: %s\n", s);
429 failure_exit();
430 }
431
432 #define tl_assert(xxx) assert(xxx)
433 #define VG_(xxxx) xxxx
434 #define tool_panic(zzz) panic(zzz)
435 #define MC_(zzzz) MC_##zzzz
436 #define TL_(zzzz) SK_##zzzz
437
438
439 static void MC_helperc_complain_undef ( void );
440 static void MC_helperc_LOADV8 ( void );
441 static void MC_helperc_LOADV4 ( void );
442 static void MC_helperc_LOADV2 ( void );
443 static void MC_helperc_LOADV1 ( void );
444 static void MC_helperc_STOREV8( void );
445 static void MC_helperc_STOREV4( void );
446 static void MC_helperc_STOREV2( void );
447 static void MC_helperc_STOREV1( void );
448 static void MC_helperc_value_check0_fail( void );
449 static void MC_helperc_value_check1_fail( void );
450 static void MC_helperc_value_check4_fail( void );
451
MC_helperc_complain_undef(void)452 static void MC_helperc_complain_undef ( void ) { }
MC_helperc_LOADV8(void)453 static void MC_helperc_LOADV8 ( void ) { }
MC_helperc_LOADV4(void)454 static void MC_helperc_LOADV4 ( void ) { }
MC_helperc_LOADV2(void)455 static void MC_helperc_LOADV2 ( void ) { }
MC_helperc_LOADV1(void)456 static void MC_helperc_LOADV1 ( void ) { }
MC_helperc_STOREV8(void)457 static void MC_helperc_STOREV8( void ) { }
MC_helperc_STOREV4(void)458 static void MC_helperc_STOREV4( void ) { }
MC_helperc_STOREV2(void)459 static void MC_helperc_STOREV2( void ) { }
MC_helperc_STOREV1(void)460 static void MC_helperc_STOREV1( void ) { }
MC_helperc_value_check0_fail(void)461 static void MC_helperc_value_check0_fail( void ) { }
MC_helperc_value_check1_fail(void)462 static void MC_helperc_value_check1_fail( void ) { }
MC_helperc_value_check4_fail(void)463 static void MC_helperc_value_check4_fail( void ) { }
464
465
466 /*--------------------------------------------------------------------*/
467 /*--- Instrument IR to perform memory checking operations. ---*/
468 /*--- mc_translate.c ---*/
469 /*--------------------------------------------------------------------*/
470
471 /*
472 This file is part of MemCheck, a heavyweight Valgrind tool for
473 detecting memory errors.
474
475 Copyright (C) 2000-2011 Julian Seward
476 jseward@acm.org
477
478 This program is free software; you can redistribute it and/or
479 modify it under the terms of the GNU General Public License as
480 published by the Free Software Foundation; either version 2 of the
481 License, or (at your option) any later version.
482
483 This program is distributed in the hope that it will be useful, but
484 WITHOUT ANY WARRANTY; without even the implied warranty of
485 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
486 General Public License for more details.
487
488 You should have received a copy of the GNU General Public License
489 along with this program; if not, write to the Free Software
490 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
491 02111-1307, USA.
492
493 The GNU General Public License is contained in the file COPYING.
494 */
495
496 //#include "mc_include.h"
497
498
499 /*------------------------------------------------------------*/
500 /*--- Forward decls ---*/
501 /*------------------------------------------------------------*/
502
503 struct _MCEnv;
504
505 static IRType shadowType ( IRType ty );
506 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
507
508
509 /*------------------------------------------------------------*/
510 /*--- Memcheck running state, and tmp management. ---*/
511 /*------------------------------------------------------------*/
512
513 /* Carries around state during memcheck instrumentation. */
514 typedef
515 struct _MCEnv {
516 /* MODIFIED: the bb being constructed. IRStmts are added. */
517 IRSB* bb;
518
519 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
520 original temps to their current their current shadow temp.
521 Initially all entries are IRTemp_INVALID. Entries are added
522 lazily since many original temps are not used due to
523 optimisation prior to instrumentation. Note that floating
524 point original tmps are shadowed by integer tmps of the same
525 size, and Bit-typed original tmps are shadowed by the type
526 Ity_I8. See comment below. */
527 IRTemp* tmpMap;
528 Int n_originalTmps; /* for range checking */
529
530 /* READONLY: the guest layout. This indicates which parts of
531 the guest state should be regarded as 'always defined'. */
532 VexGuestLayout* layout;
533 /* READONLY: the host word type. Needed for constructing
534 arguments of type 'HWord' to be passed to helper functions.
535 Ity_I32 or Ity_I64 only. */
536 IRType hWordTy;
537 }
538 MCEnv;
539
540 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
541 demand), as they are encountered. This is for two reasons.
542
543 (1) (less important reason): Many original tmps are unused due to
544 initial IR optimisation, and we do not want to spaces in tables
545 tracking them.
546
547 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
548 table indexed [0 .. n_types-1], which gives the current shadow for
549 each original tmp, or INVALID_IRTEMP if none is so far assigned.
550 It is necessary to support making multiple assignments to a shadow
551 -- specifically, after testing a shadow for definedness, it needs
552 to be made defined. But IR's SSA property disallows this.
553
554 (2) (more important reason): Therefore, when a shadow needs to get
555 a new value, a new temporary is created, the value is assigned to
556 that, and the tmpMap is updated to reflect the new binding.
557
558 A corollary is that if the tmpMap maps a given tmp to
559 INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
560 there's a read-before-write error in the original tmps. The IR
561 sanity checker should catch all such anomalies, however.
562 */
563
564 /* Find the tmp currently shadowing the given original tmp. If none
565 so far exists, allocate one. */
findShadowTmp(MCEnv * mce,IRTemp orig)566 static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
567 {
568 tl_assert(orig < mce->n_originalTmps);
569 if (mce->tmpMap[orig] == IRTemp_INVALID) {
570 mce->tmpMap[orig]
571 = newIRTemp(mce->bb->tyenv,
572 shadowType(mce->bb->tyenv->types[orig]));
573 }
574 return mce->tmpMap[orig];
575 }
576
577 /* Allocate a new shadow for the given original tmp. This means any
578 previous shadow is abandoned. This is needed because it is
579 necessary to give a new value to a shadow once it has been tested
580 for undefinedness, but unfortunately IR's SSA property disallows
581 this. Instead we must abandon the old shadow, allocate a new one
582 and use that instead. */
newShadowTmp(MCEnv * mce,IRTemp orig)583 static void newShadowTmp ( MCEnv* mce, IRTemp orig )
584 {
585 tl_assert(orig < mce->n_originalTmps);
586 mce->tmpMap[orig]
587 = newIRTemp(mce->bb->tyenv,
588 shadowType(mce->bb->tyenv->types[orig]));
589 }
590
591
592 /*------------------------------------------------------------*/
593 /*--- IRAtoms -- a subset of IRExprs ---*/
594 /*------------------------------------------------------------*/
595
596 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
597 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
598 input, most of this code deals in atoms. Usefully, a value atom
599 always has a V-value which is also an atom: constants are shadowed
600 by constants, and temps are shadowed by the corresponding shadow
601 temporary. */
602
603 typedef IRExpr IRAtom;
604
605 /* (used for sanity checks only): is this an atom which looks
606 like it's from original code? */
isOriginalAtom(MCEnv * mce,IRAtom * a1)607 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
608 {
609 if (a1->tag == Iex_Const)
610 return True;
611 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
612 return True;
613 return False;
614 }
615
616 /* (used for sanity checks only): is this an atom which looks
617 like it's from shadow code? */
isShadowAtom(MCEnv * mce,IRAtom * a1)618 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
619 {
620 if (a1->tag == Iex_Const)
621 return True;
622 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
623 return True;
624 return False;
625 }
626
627 /* (used for sanity checks only): check that both args are atoms and
628 are identically-kinded. */
sameKindedAtoms(IRAtom * a1,IRAtom * a2)629 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
630 {
631 if (a1->tag == Iex_RdTmp && a1->tag == Iex_RdTmp)
632 return True;
633 if (a1->tag == Iex_Const && a1->tag == Iex_Const)
634 return True;
635 return False;
636 }
637
638
639 /*------------------------------------------------------------*/
640 /*--- Type management ---*/
641 /*------------------------------------------------------------*/
642
643 /* Shadow state is always accessed using integer types. This returns
644 an integer type with the same size (as per sizeofIRType) as the
645 given type. The only valid shadow types are Bit, I8, I16, I32,
646 I64, V128. */
647
shadowType(IRType ty)648 static IRType shadowType ( IRType ty )
649 {
650 switch (ty) {
651 case Ity_I1:
652 case Ity_I8:
653 case Ity_I16:
654 case Ity_I32:
655 case Ity_I64: return ty;
656 case Ity_F32: return Ity_I32;
657 case Ity_F64: return Ity_I64;
658 case Ity_V128: return Ity_V128;
659 default: ppIRType(ty);
660 VG_(tool_panic)("memcheck:shadowType");
661 }
662 }
663
664 /* Produce a 'defined' value of the given shadow type. Should only be
665 supplied shadow types (Bit/I8/I16/I32/UI64). */
definedOfType(IRType ty)666 static IRExpr* definedOfType ( IRType ty ) {
667 switch (ty) {
668 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
669 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
670 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
671 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
672 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
673 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
674 default: VG_(tool_panic)("memcheck:definedOfType");
675 }
676 }
677
678
679 /*------------------------------------------------------------*/
680 /*--- Constructing IR fragments ---*/
681 /*------------------------------------------------------------*/
682
683 /* assign value to tmp */
684 #define assign(_bb,_tmp,_expr) \
685 addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
686
687 /* add stmt to a bb */
688 #define stmt(_bb,_stmt) \
689 addStmtToIRSB((_bb), (_stmt))
690
691 /* build various kinds of expressions */
692 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
693 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
694 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
695 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
696 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
697 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
698 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
699 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
700
701 /* bind the given expression to a new temporary, and return the
702 temporary. This effectively converts an arbitrary expression into
703 an atom. */
assignNew(MCEnv * mce,IRType ty,IRExpr * e)704 static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
705 IRTemp t = newIRTemp(mce->bb->tyenv, ty);
706 assign(mce->bb, t, e);
707 return mkexpr(t);
708 }
709
710
711 /*------------------------------------------------------------*/
712 /*--- Constructing definedness primitive ops ---*/
713 /*------------------------------------------------------------*/
714
715 /* --------- Defined-if-either-defined --------- */
716
mkDifD8(MCEnv * mce,IRAtom * a1,IRAtom * a2)717 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
718 tl_assert(isShadowAtom(mce,a1));
719 tl_assert(isShadowAtom(mce,a2));
720 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
721 }
722
mkDifD16(MCEnv * mce,IRAtom * a1,IRAtom * a2)723 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
724 tl_assert(isShadowAtom(mce,a1));
725 tl_assert(isShadowAtom(mce,a2));
726 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
727 }
728
mkDifD32(MCEnv * mce,IRAtom * a1,IRAtom * a2)729 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
730 tl_assert(isShadowAtom(mce,a1));
731 tl_assert(isShadowAtom(mce,a2));
732 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
733 }
734
mkDifD64(MCEnv * mce,IRAtom * a1,IRAtom * a2)735 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
736 tl_assert(isShadowAtom(mce,a1));
737 tl_assert(isShadowAtom(mce,a2));
738 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
739 }
740
mkDifDV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)741 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
742 tl_assert(isShadowAtom(mce,a1));
743 tl_assert(isShadowAtom(mce,a2));
744 return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
745 }
746
747 /* --------- Undefined-if-either-undefined --------- */
748
mkUifU8(MCEnv * mce,IRAtom * a1,IRAtom * a2)749 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
750 tl_assert(isShadowAtom(mce,a1));
751 tl_assert(isShadowAtom(mce,a2));
752 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
753 }
754
mkUifU16(MCEnv * mce,IRAtom * a1,IRAtom * a2)755 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
756 tl_assert(isShadowAtom(mce,a1));
757 tl_assert(isShadowAtom(mce,a2));
758 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
759 }
760
mkUifU32(MCEnv * mce,IRAtom * a1,IRAtom * a2)761 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
762 tl_assert(isShadowAtom(mce,a1));
763 tl_assert(isShadowAtom(mce,a2));
764 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
765 }
766
mkUifU64(MCEnv * mce,IRAtom * a1,IRAtom * a2)767 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
768 tl_assert(isShadowAtom(mce,a1));
769 tl_assert(isShadowAtom(mce,a2));
770 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
771 }
772
mkUifUV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)773 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
774 tl_assert(isShadowAtom(mce,a1));
775 tl_assert(isShadowAtom(mce,a2));
776 return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
777 }
778
mkUifU(MCEnv * mce,IRType vty,IRAtom * a1,IRAtom * a2)779 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
780 switch (vty) {
781 case Ity_I8: return mkUifU8(mce, a1, a2);
782 case Ity_I16: return mkUifU16(mce, a1, a2);
783 case Ity_I32: return mkUifU32(mce, a1, a2);
784 case Ity_I64: return mkUifU64(mce, a1, a2);
785 case Ity_V128: return mkUifUV128(mce, a1, a2);
786 default:
787 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
788 VG_(tool_panic)("memcheck:mkUifU");
789 }
790 }
791
792 /* --------- The Left-family of operations. --------- */
793
mkLeft8(MCEnv * mce,IRAtom * a1)794 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
795 tl_assert(isShadowAtom(mce,a1));
796 /* It's safe to duplicate a1 since it's only an atom */
797 return assignNew(mce, Ity_I8,
798 binop(Iop_Or8, a1,
799 assignNew(mce, Ity_I8,
800 /* unop(Iop_Neg8, a1)))); */
801 binop(Iop_Sub8, mkU8(0), a1) )));
802 }
803
mkLeft16(MCEnv * mce,IRAtom * a1)804 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
805 tl_assert(isShadowAtom(mce,a1));
806 /* It's safe to duplicate a1 since it's only an atom */
807 return assignNew(mce, Ity_I16,
808 binop(Iop_Or16, a1,
809 assignNew(mce, Ity_I16,
810 /* unop(Iop_Neg16, a1)))); */
811 binop(Iop_Sub16, mkU16(0), a1) )));
812 }
813
mkLeft32(MCEnv * mce,IRAtom * a1)814 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
815 tl_assert(isShadowAtom(mce,a1));
816 /* It's safe to duplicate a1 since it's only an atom */
817 return assignNew(mce, Ity_I32,
818 binop(Iop_Or32, a1,
819 assignNew(mce, Ity_I32,
820 /* unop(Iop_Neg32, a1)))); */
821 binop(Iop_Sub32, mkU32(0), a1) )));
822 }
823
824 /* --------- 'Improvement' functions for AND/OR. --------- */
825
826 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
827 defined (0); all other -> undefined (1).
828 */
mkImproveAND8(MCEnv * mce,IRAtom * data,IRAtom * vbits)829 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
830 {
831 tl_assert(isOriginalAtom(mce, data));
832 tl_assert(isShadowAtom(mce, vbits));
833 tl_assert(sameKindedAtoms(data, vbits));
834 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
835 }
836
mkImproveAND16(MCEnv * mce,IRAtom * data,IRAtom * vbits)837 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
838 {
839 tl_assert(isOriginalAtom(mce, data));
840 tl_assert(isShadowAtom(mce, vbits));
841 tl_assert(sameKindedAtoms(data, vbits));
842 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
843 }
844
mkImproveAND32(MCEnv * mce,IRAtom * data,IRAtom * vbits)845 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
846 {
847 tl_assert(isOriginalAtom(mce, data));
848 tl_assert(isShadowAtom(mce, vbits));
849 tl_assert(sameKindedAtoms(data, vbits));
850 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
851 }
852
mkImproveAND64(MCEnv * mce,IRAtom * data,IRAtom * vbits)853 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
854 {
855 tl_assert(isOriginalAtom(mce, data));
856 tl_assert(isShadowAtom(mce, vbits));
857 tl_assert(sameKindedAtoms(data, vbits));
858 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
859 }
860
mkImproveANDV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)861 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
862 {
863 tl_assert(isOriginalAtom(mce, data));
864 tl_assert(isShadowAtom(mce, vbits));
865 tl_assert(sameKindedAtoms(data, vbits));
866 return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
867 }
868
869 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
870 defined (0); all other -> undefined (1).
871 */
mkImproveOR8(MCEnv * mce,IRAtom * data,IRAtom * vbits)872 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
873 {
874 tl_assert(isOriginalAtom(mce, data));
875 tl_assert(isShadowAtom(mce, vbits));
876 tl_assert(sameKindedAtoms(data, vbits));
877 return assignNew(
878 mce, Ity_I8,
879 binop(Iop_Or8,
880 assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
881 vbits) );
882 }
883
mkImproveOR16(MCEnv * mce,IRAtom * data,IRAtom * vbits)884 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
885 {
886 tl_assert(isOriginalAtom(mce, data));
887 tl_assert(isShadowAtom(mce, vbits));
888 tl_assert(sameKindedAtoms(data, vbits));
889 return assignNew(
890 mce, Ity_I16,
891 binop(Iop_Or16,
892 assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
893 vbits) );
894 }
895
mkImproveOR32(MCEnv * mce,IRAtom * data,IRAtom * vbits)896 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
897 {
898 tl_assert(isOriginalAtom(mce, data));
899 tl_assert(isShadowAtom(mce, vbits));
900 tl_assert(sameKindedAtoms(data, vbits));
901 return assignNew(
902 mce, Ity_I32,
903 binop(Iop_Or32,
904 assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
905 vbits) );
906 }
907
mkImproveOR64(MCEnv * mce,IRAtom * data,IRAtom * vbits)908 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
909 {
910 tl_assert(isOriginalAtom(mce, data));
911 tl_assert(isShadowAtom(mce, vbits));
912 tl_assert(sameKindedAtoms(data, vbits));
913 return assignNew(
914 mce, Ity_I64,
915 binop(Iop_Or64,
916 assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
917 vbits) );
918 }
919
mkImproveORV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)920 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
921 {
922 tl_assert(isOriginalAtom(mce, data));
923 tl_assert(isShadowAtom(mce, vbits));
924 tl_assert(sameKindedAtoms(data, vbits));
925 return assignNew(
926 mce, Ity_V128,
927 binop(Iop_OrV128,
928 assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
929 vbits) );
930 }
931
932 /* --------- Pessimising casts. --------- */
933
mkPCastTo(MCEnv * mce,IRType dst_ty,IRAtom * vbits)934 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
935 {
936 IRType ty;
937 IRAtom* tmp1;
938 /* Note, dst_ty is a shadow type, not an original type. */
939 /* First of all, collapse vbits down to a single bit. */
940 tl_assert(isShadowAtom(mce,vbits));
941 ty = typeOfIRExpr(mce->bb->tyenv, vbits);
942 tmp1 = NULL;
943 switch (ty) {
944 case Ity_I1:
945 tmp1 = vbits;
946 break;
947 case Ity_I8:
948 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
949 break;
950 case Ity_I16:
951 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
952 break;
953 case Ity_I32:
954 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
955 break;
956 case Ity_I64:
957 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
958 break;
959 default:
960 VG_(tool_panic)("mkPCastTo(1)");
961 }
962 tl_assert(tmp1);
963 /* Now widen up to the dst type. */
964 switch (dst_ty) {
965 case Ity_I1:
966 return tmp1;
967 case Ity_I8:
968 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
969 case Ity_I16:
970 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
971 case Ity_I32:
972 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
973 case Ity_I64:
974 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
975 case Ity_V128:
976 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
977 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
978 return tmp1;
979 default:
980 ppIRType(dst_ty);
981 VG_(tool_panic)("mkPCastTo(2)");
982 }
983 }
984
985
986 /*------------------------------------------------------------*/
987 /*--- Emit a test and complaint if something is undefined. ---*/
988 /*------------------------------------------------------------*/
989
990 /* Set the annotations on a dirty helper to indicate that the stack
991 pointer and instruction pointers might be read. This is the
992 behaviour of all 'emit-a-complaint' style functions we might
993 call. */
994
setHelperAnns(MCEnv * mce,IRDirty * di)995 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
996 di->nFxState = 2;
997 di->fxState[0].fx = Ifx_Read;
998 di->fxState[0].offset = mce->layout->offset_SP;
999 di->fxState[0].size = mce->layout->sizeof_SP;
1000 di->fxState[1].fx = Ifx_Read;
1001 di->fxState[1].offset = mce->layout->offset_IP;
1002 di->fxState[1].size = mce->layout->sizeof_IP;
1003 }
1004
1005
1006 /* Check the supplied **original** atom for undefinedness, and emit a
1007 complaint if so. Once that happens, mark it as defined. This is
1008 possible because the atom is either a tmp or literal. If it's a
1009 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1010 be defined. In fact as mentioned above, we will have to allocate a
1011 new tmp to carry the new 'defined' shadow value, and update the
1012 original->tmp mapping accordingly; we cannot simply assign a new
1013 value to an existing shadow tmp as this breaks SSAness -- resulting
1014 in the post-instrumentation sanity checker spluttering in disapproval.
1015 */
complainIfUndefined(MCEnv * mce,IRAtom * atom)1016 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1017 {
1018 IRAtom* vatom;
1019 IRType ty;
1020 Int sz;
1021 IRDirty* di;
1022 IRAtom* cond;
1023
1024 /* Since the original expression is atomic, there's no duplicated
1025 work generated by making multiple V-expressions for it. So we
1026 don't really care about the possibility that someone else may
1027 also create a V-interpretion for it. */
1028 tl_assert(isOriginalAtom(mce, atom));
1029 vatom = expr2vbits( mce, atom );
1030 tl_assert(isShadowAtom(mce, vatom));
1031 tl_assert(sameKindedAtoms(atom, vatom));
1032
1033 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1034
1035 /* sz is only used for constructing the error message */
1036 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1037
1038 cond = mkPCastTo( mce, Ity_I1, vatom );
1039 /* cond will be 0 if all defined, and 1 if any not defined. */
1040
1041 switch (sz) {
1042 case 0:
1043 di = unsafeIRDirty_0_N( 0/*regparms*/,
1044 "MC_(helperc_value_check0_fail)",
1045 &MC_(helperc_value_check0_fail),
1046 mkIRExprVec_0()
1047 );
1048 break;
1049 case 1:
1050 di = unsafeIRDirty_0_N( 0/*regparms*/,
1051 "MC_(helperc_value_check1_fail)",
1052 &MC_(helperc_value_check1_fail),
1053 mkIRExprVec_0()
1054 );
1055 break;
1056 case 4:
1057 di = unsafeIRDirty_0_N( 0/*regparms*/,
1058 "MC_(helperc_value_check4_fail)",
1059 &MC_(helperc_value_check4_fail),
1060 mkIRExprVec_0()
1061 );
1062 break;
1063 default:
1064 di = unsafeIRDirty_0_N( 1/*regparms*/,
1065 "MC_(helperc_complain_undef)",
1066 &MC_(helperc_complain_undef),
1067 mkIRExprVec_1( mkIRExpr_HWord( sz ))
1068 );
1069 break;
1070 }
1071 di->guard = cond;
1072 setHelperAnns( mce, di );
1073 stmt( mce->bb, IRStmt_Dirty(di));
1074
1075 /* Set the shadow tmp to be defined. First, update the
1076 orig->shadow tmp mapping to reflect the fact that this shadow is
1077 getting a new value. */
1078 tl_assert(isIRAtom(vatom));
1079 /* sameKindedAtoms ... */
1080 if (vatom->tag == Iex_RdTmp) {
1081 tl_assert(atom->tag == Iex_RdTmp);
1082 newShadowTmp(mce, atom->Iex.RdTmp.tmp);
1083 assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
1084 definedOfType(ty));
1085 }
1086 }
1087
1088
1089 /*------------------------------------------------------------*/
1090 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1091 /*------------------------------------------------------------*/
1092
1093 /* Examine the always-defined sections declared in layout to see if
1094 the (offset,size) section is within one. Note, is is an error to
1095 partially fall into such a region: (offset,size) should either be
1096 completely in such a region or completely not-in such a region.
1097 */
isAlwaysDefd(MCEnv * mce,Int offset,Int size)1098 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1099 {
1100 Int minoffD, maxoffD, i;
1101 Int minoff = offset;
1102 Int maxoff = minoff + size - 1;
1103 tl_assert((minoff & ~0xFFFF) == 0);
1104 tl_assert((maxoff & ~0xFFFF) == 0);
1105
1106 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1107 minoffD = mce->layout->alwaysDefd[i].offset;
1108 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1109 tl_assert((minoffD & ~0xFFFF) == 0);
1110 tl_assert((maxoffD & ~0xFFFF) == 0);
1111
1112 if (maxoff < minoffD || maxoffD < minoff)
1113 continue; /* no overlap */
1114 if (minoff >= minoffD && maxoff <= maxoffD)
1115 return True; /* completely contained in an always-defd section */
1116
1117 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1118 }
1119 return False; /* could not find any containing section */
1120 }
1121
1122
1123 /* Generate into bb suitable actions to shadow this Put. If the state
1124 slice is marked 'always defined', do nothing. Otherwise, write the
1125 supplied V bits to the shadow state. We can pass in either an
1126 original atom or a V-atom, but not both. In the former case the
1127 relevant V-bits are then generated from the original.
1128 */
1129 static
do_shadow_PUT(MCEnv * mce,Int offset,IRAtom * atom,IRAtom * vatom)1130 void do_shadow_PUT ( MCEnv* mce, Int offset,
1131 IRAtom* atom, IRAtom* vatom )
1132 {
1133 IRType ty;
1134 if (atom) {
1135 tl_assert(!vatom);
1136 tl_assert(isOriginalAtom(mce, atom));
1137 vatom = expr2vbits( mce, atom );
1138 } else {
1139 tl_assert(vatom);
1140 tl_assert(isShadowAtom(mce, vatom));
1141 }
1142
1143 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1144 tl_assert(ty != Ity_I1);
1145 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1146 /* later: no ... */
1147 /* emit code to emit a complaint if any of the vbits are 1. */
1148 /* complainIfUndefined(mce, atom); */
1149 } else {
1150 /* Do a plain shadow Put. */
1151 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1152 }
1153 }
1154
1155
1156 /* Return an expression which contains the V bits corresponding to the
1157 given GETI (passed in in pieces).
1158 */
1159 static
do_shadow_PUTI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias,IRAtom * atom)1160 void do_shadow_PUTI ( MCEnv* mce,
1161 IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
1162 {
1163 IRAtom* vatom;
1164 IRType ty, tyS;
1165 Int arrSize;;
1166
1167 tl_assert(isOriginalAtom(mce,atom));
1168 vatom = expr2vbits( mce, atom );
1169 tl_assert(sameKindedAtoms(atom, vatom));
1170 ty = descr->elemTy;
1171 tyS = shadowType(ty);
1172 arrSize = descr->nElems * sizeofIRType(ty);
1173 tl_assert(ty != Ity_I1);
1174 tl_assert(isOriginalAtom(mce,ix));
1175 complainIfUndefined(mce,ix);
1176 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1177 /* later: no ... */
1178 /* emit code to emit a complaint if any of the vbits are 1. */
1179 /* complainIfUndefined(mce, atom); */
1180 } else {
1181 /* Do a cloned version of the Put that refers to the shadow
1182 area. */
1183 IRRegArray* new_descr
1184 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1185 tyS, descr->nElems);
1186 stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
1187 }
1188 }
1189
1190
1191 /* Return an expression which contains the V bits corresponding to the
1192 given GET (passed in in pieces).
1193 */
1194 static
shadow_GET(MCEnv * mce,Int offset,IRType ty)1195 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1196 {
1197 IRType tyS = shadowType(ty);
1198 tl_assert(ty != Ity_I1);
1199 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1200 /* Always defined, return all zeroes of the relevant type */
1201 return definedOfType(tyS);
1202 } else {
1203 /* return a cloned version of the Get that refers to the shadow
1204 area. */
1205 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1206 }
1207 }
1208
1209
1210 /* Return an expression which contains the V bits corresponding to the
1211 given GETI (passed in in pieces).
1212 */
1213 static
shadow_GETI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias)1214 IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
1215 {
1216 IRType ty = descr->elemTy;
1217 IRType tyS = shadowType(ty);
1218 Int arrSize = descr->nElems * sizeofIRType(ty);
1219 tl_assert(ty != Ity_I1);
1220 tl_assert(isOriginalAtom(mce,ix));
1221 complainIfUndefined(mce,ix);
1222 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1223 /* Always defined, return all zeroes of the relevant type */
1224 return definedOfType(tyS);
1225 } else {
1226 /* return a cloned version of the Get that refers to the shadow
1227 area. */
1228 IRRegArray* new_descr
1229 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1230 tyS, descr->nElems);
1231 return IRExpr_GetI( new_descr, ix, bias );
1232 }
1233 }
1234
1235
1236 /*------------------------------------------------------------*/
1237 /*--- Generating approximations for unknown operations, ---*/
1238 /*--- using lazy-propagate semantics ---*/
1239 /*------------------------------------------------------------*/
1240
1241 /* Lazy propagation of undefinedness from two values, resulting in the
1242 specified shadow type.
1243 */
1244 static
mkLazy2(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2)1245 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1246 {
1247 /* force everything via 32-bit intermediaries. */
1248 IRAtom* at;
1249 tl_assert(isShadowAtom(mce,va1));
1250 tl_assert(isShadowAtom(mce,va2));
1251 at = mkPCastTo(mce, Ity_I32, va1);
1252 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1253 at = mkPCastTo(mce, finalVty, at);
1254 return at;
1255 }
1256
1257
1258 /* Do the lazy propagation game from a null-terminated vector of
1259 atoms. This is presumably the arguments to a helper call, so the
1260 IRCallee info is also supplied in order that we can know which
1261 arguments should be ignored (via the .mcx_mask field).
1262 */
1263 static
mkLazyN(MCEnv * mce,IRAtom ** exprvec,IRType finalVtype,IRCallee * cee)1264 IRAtom* mkLazyN ( MCEnv* mce,
1265 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1266 {
1267 Int i;
1268 IRAtom* here;
1269 IRAtom* curr = definedOfType(Ity_I32);
1270 for (i = 0; exprvec[i]; i++) {
1271 tl_assert(i < 32);
1272 tl_assert(isOriginalAtom(mce, exprvec[i]));
1273 /* Only take notice of this arg if the callee's mc-exclusion
1274 mask does not say it is to be excluded. */
1275 if (cee->mcx_mask & (1<<i)) {
1276 /* the arg is to be excluded from definedness checking. Do
1277 nothing. */
1278 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1279 } else {
1280 /* calculate the arg's definedness, and pessimistically merge
1281 it in. */
1282 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1283 curr = mkUifU32(mce, here, curr);
1284 }
1285 }
1286 return mkPCastTo(mce, finalVtype, curr );
1287 }
1288
1289
1290 /*------------------------------------------------------------*/
1291 /*--- Generating expensive sequences for exact carry-chain ---*/
1292 /*--- propagation in add/sub and related operations. ---*/
1293 /*------------------------------------------------------------*/
1294
1295 static
1296 __attribute__((unused))
expensiveAdd32(MCEnv * mce,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1297 IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
1298 IRAtom* aa, IRAtom* bb )
1299 {
1300 IRAtom *a_min, *b_min, *a_max, *b_max;
1301 IRType ty;
1302 IROp opAND, opOR, opXOR, opNOT, opADD;
1303
1304 tl_assert(isShadowAtom(mce,qaa));
1305 tl_assert(isShadowAtom(mce,qbb));
1306 tl_assert(isOriginalAtom(mce,aa));
1307 tl_assert(isOriginalAtom(mce,bb));
1308 tl_assert(sameKindedAtoms(qaa,aa));
1309 tl_assert(sameKindedAtoms(qbb,bb));
1310
1311 ty = Ity_I32;
1312 opAND = Iop_And32;
1313 opOR = Iop_Or32;
1314 opXOR = Iop_Xor32;
1315 opNOT = Iop_Not32;
1316 opADD = Iop_Add32;
1317
1318 // a_min = aa & ~qaa
1319 a_min = assignNew(mce,ty,
1320 binop(opAND, aa,
1321 assignNew(mce,ty, unop(opNOT, qaa))));
1322
1323 // b_min = bb & ~qbb
1324 b_min = assignNew(mce,ty,
1325 binop(opAND, bb,
1326 assignNew(mce,ty, unop(opNOT, qbb))));
1327
1328 // a_max = aa | qaa
1329 a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1330
1331 // b_max = bb | qbb
1332 b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1333
1334 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1335 return
1336 assignNew(mce,ty,
1337 binop( opOR,
1338 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1339 assignNew(mce,ty,
1340 binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
1341 assignNew(mce,ty, binop(opADD, a_max, b_max))
1342 )
1343 )
1344 )
1345 );
1346 }
1347
1348
1349 /*------------------------------------------------------------*/
1350 /*--- Helpers for dealing with vector primops. ---*/
1351 /*------------------------------------------------------------*/
1352
1353 /* Vector pessimisation -- pessimise within each lane individually. */
1354
mkPCast8x16(MCEnv * mce,IRAtom * at)1355 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1356 {
1357 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1358 }
1359
mkPCast16x8(MCEnv * mce,IRAtom * at)1360 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1361 {
1362 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1363 }
1364
mkPCast32x4(MCEnv * mce,IRAtom * at)1365 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1366 {
1367 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1368 }
1369
mkPCast64x2(MCEnv * mce,IRAtom * at)1370 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1371 {
1372 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1373 }
1374
1375
1376 /* Here's a simple scheme capable of handling ops derived from SSE1
1377 code and while only generating ops that can be efficiently
1378 implemented in SSE1. */
1379
1380 /* All-lanes versions are straightforward:
1381
1382 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
1383
1384 unary32Fx4(x,y) ==> PCast32x4(x#)
1385
1386 Lowest-lane-only versions are more complex:
1387
1388 binary32F0x4(x,y) ==> SetV128lo32(
1389 x#,
1390 PCast32(V128to32(UifUV128(x#,y#)))
1391 )
1392
1393 This is perhaps not so obvious. In particular, it's faster to
1394 do a V128-bit UifU and then take the bottom 32 bits than the more
1395 obvious scheme of taking the bottom 32 bits of each operand
1396 and doing a 32-bit UifU. Basically since UifU is fast and
1397 chopping lanes off vector values is slow.
1398
1399 Finally:
1400
1401 unary32F0x4(x) ==> SetV128lo32(
1402 x#,
1403 PCast32(V128to32(x#))
1404 )
1405
1406 Where:
1407
1408 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1409 PCast32x4(v#) = CmpNEZ32x4(v#)
1410 */
1411
1412 static
binary32Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1413 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1414 {
1415 IRAtom* at;
1416 tl_assert(isShadowAtom(mce, vatomX));
1417 tl_assert(isShadowAtom(mce, vatomY));
1418 at = mkUifUV128(mce, vatomX, vatomY);
1419 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
1420 return at;
1421 }
1422
1423 static
unary32Fx4(MCEnv * mce,IRAtom * vatomX)1424 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1425 {
1426 IRAtom* at;
1427 tl_assert(isShadowAtom(mce, vatomX));
1428 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
1429 return at;
1430 }
1431
1432 static
binary32F0x4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1433 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1434 {
1435 IRAtom* at;
1436 tl_assert(isShadowAtom(mce, vatomX));
1437 tl_assert(isShadowAtom(mce, vatomY));
1438 at = mkUifUV128(mce, vatomX, vatomY);
1439 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
1440 at = mkPCastTo(mce, Ity_I32, at);
1441 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1442 return at;
1443 }
1444
1445 static
unary32F0x4(MCEnv * mce,IRAtom * vatomX)1446 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1447 {
1448 IRAtom* at;
1449 tl_assert(isShadowAtom(mce, vatomX));
1450 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
1451 at = mkPCastTo(mce, Ity_I32, at);
1452 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1453 return at;
1454 }
1455
1456 /* --- ... and ... 64Fx2 versions of the same ... --- */
1457
1458 static
binary64Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1459 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1460 {
1461 IRAtom* at;
1462 tl_assert(isShadowAtom(mce, vatomX));
1463 tl_assert(isShadowAtom(mce, vatomY));
1464 at = mkUifUV128(mce, vatomX, vatomY);
1465 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
1466 return at;
1467 }
1468
1469 static
unary64Fx2(MCEnv * mce,IRAtom * vatomX)1470 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1471 {
1472 IRAtom* at;
1473 tl_assert(isShadowAtom(mce, vatomX));
1474 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
1475 return at;
1476 }
1477
1478 static
binary64F0x2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1479 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1480 {
1481 IRAtom* at;
1482 tl_assert(isShadowAtom(mce, vatomX));
1483 tl_assert(isShadowAtom(mce, vatomY));
1484 at = mkUifUV128(mce, vatomX, vatomY);
1485 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
1486 at = mkPCastTo(mce, Ity_I64, at);
1487 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1488 return at;
1489 }
1490
1491 static
unary64F0x2(MCEnv * mce,IRAtom * vatomX)1492 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1493 {
1494 IRAtom* at;
1495 tl_assert(isShadowAtom(mce, vatomX));
1496 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
1497 at = mkPCastTo(mce, Ity_I64, at);
1498 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1499 return at;
1500 }
1501
1502 /* --- --- Vector saturated narrowing --- --- */
1503
1504 /* This is quite subtle. What to do is simple:
1505
1506 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1507
1508 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1509
1510 Why this is right is not so simple. Consider a lane in the args,
1511 vatom1 or 2, doesn't matter.
1512
1513 After the PCast, that lane is all 0s (defined) or all
1514 1s(undefined).
1515
1516 Both signed and unsigned saturating narrowing of all 0s produces
1517 all 0s, which is what we want.
1518
1519 The all-1s case is more complex. Unsigned narrowing interprets an
1520 all-1s input as the largest unsigned integer, and so produces all
1521 1s as a result since that is the largest unsigned value at the
1522 smaller width.
1523
1524 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1525 to -1, so we still wind up with all 1s at the smaller width.
1526
1527 So: In short, pessimise the args, then apply the original narrowing
1528 op.
1529 */
1530 static
vectorNarrowV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)1531 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1532 IRAtom* vatom1, IRAtom* vatom2)
1533 {
1534 IRAtom *at1, *at2, *at3;
1535 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1536 switch (narrow_op) {
1537 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
1538 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
1539 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
1540 default: VG_(tool_panic)("vectorNarrowV128");
1541 }
1542 tl_assert(isShadowAtom(mce,vatom1));
1543 tl_assert(isShadowAtom(mce,vatom2));
1544 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1545 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1546 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1547 return at3;
1548 }
1549
1550
1551 /* --- --- Vector integer arithmetic --- --- */
1552
1553 /* Simple ... UifU the args and per-lane pessimise the results. */
1554 static
binary8Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1555 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1556 {
1557 IRAtom* at;
1558 at = mkUifUV128(mce, vatom1, vatom2);
1559 at = mkPCast8x16(mce, at);
1560 return at;
1561 }
1562
1563 static
binary16Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1564 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1565 {
1566 IRAtom* at;
1567 at = mkUifUV128(mce, vatom1, vatom2);
1568 at = mkPCast16x8(mce, at);
1569 return at;
1570 }
1571
1572 static
binary32Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1573 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1574 {
1575 IRAtom* at;
1576 at = mkUifUV128(mce, vatom1, vatom2);
1577 at = mkPCast32x4(mce, at);
1578 return at;
1579 }
1580
1581 static
binary64Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1582 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1583 {
1584 IRAtom* at;
1585 at = mkUifUV128(mce, vatom1, vatom2);
1586 at = mkPCast64x2(mce, at);
1587 return at;
1588 }
1589
1590
1591 /*------------------------------------------------------------*/
1592 /*--- Generate shadow values from all kinds of IRExprs. ---*/
1593 /*------------------------------------------------------------*/
1594
1595 static
expr2vbits_Binop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2)1596 IRAtom* expr2vbits_Binop ( MCEnv* mce,
1597 IROp op,
1598 IRAtom* atom1, IRAtom* atom2 )
1599 {
1600 IRType and_or_ty;
1601 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
1602 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
1603 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1604
1605 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1606 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1607
1608 tl_assert(isOriginalAtom(mce,atom1));
1609 tl_assert(isOriginalAtom(mce,atom2));
1610 tl_assert(isShadowAtom(mce,vatom1));
1611 tl_assert(isShadowAtom(mce,vatom2));
1612 tl_assert(sameKindedAtoms(atom1,vatom1));
1613 tl_assert(sameKindedAtoms(atom2,vatom2));
1614 switch (op) {
1615
1616 /* V128-bit SIMD (SSE2-esque) */
1617
1618 case Iop_ShrN16x8:
1619 case Iop_ShrN32x4:
1620 case Iop_ShrN64x2:
1621 case Iop_SarN16x8:
1622 case Iop_SarN32x4:
1623 case Iop_ShlN16x8:
1624 case Iop_ShlN32x4:
1625 case Iop_ShlN64x2:
1626 /* Same scheme as with all other shifts. */
1627 complainIfUndefined(mce, atom2);
1628 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1629
1630 case Iop_QSub8Ux16:
1631 case Iop_QSub8Sx16:
1632 case Iop_Sub8x16:
1633 case Iop_Min8Ux16:
1634 case Iop_Max8Ux16:
1635 case Iop_CmpGT8Sx16:
1636 case Iop_CmpEQ8x16:
1637 case Iop_Avg8Ux16:
1638 case Iop_QAdd8Ux16:
1639 case Iop_QAdd8Sx16:
1640 case Iop_Add8x16:
1641 return binary8Ix16(mce, vatom1, vatom2);
1642
1643 case Iop_QSub16Ux8:
1644 case Iop_QSub16Sx8:
1645 case Iop_Sub16x8:
1646 case Iop_Mul16x8:
1647 case Iop_MulHi16Sx8:
1648 case Iop_MulHi16Ux8:
1649 case Iop_Min16Sx8:
1650 case Iop_Max16Sx8:
1651 case Iop_CmpGT16Sx8:
1652 case Iop_CmpEQ16x8:
1653 case Iop_Avg16Ux8:
1654 case Iop_QAdd16Ux8:
1655 case Iop_QAdd16Sx8:
1656 case Iop_Add16x8:
1657 return binary16Ix8(mce, vatom1, vatom2);
1658
1659 case Iop_Sub32x4:
1660 case Iop_QSub32Sx4:
1661 case Iop_QSub32Ux4:
1662 case Iop_CmpGT32Sx4:
1663 case Iop_CmpEQ32x4:
1664 case Iop_Add32x4:
1665 case Iop_QAdd32Ux4:
1666 case Iop_QAdd32Sx4:
1667 return binary32Ix4(mce, vatom1, vatom2);
1668
1669 case Iop_Sub64x2:
1670 case Iop_QSub64Ux2:
1671 case Iop_QSub64Sx2:
1672 case Iop_Add64x2:
1673 case Iop_QAdd64Ux2:
1674 case Iop_QAdd64Sx2:
1675 return binary64Ix2(mce, vatom1, vatom2);
1676
1677 case Iop_QNarrowBin32Sto16Sx8:
1678 case Iop_QNarrowBin16Sto8Sx16:
1679 case Iop_QNarrowBin16Sto8Ux16:
1680 return vectorNarrowV128(mce, op, vatom1, vatom2);
1681
1682 case Iop_Sub64Fx2:
1683 case Iop_Mul64Fx2:
1684 case Iop_Min64Fx2:
1685 case Iop_Max64Fx2:
1686 case Iop_Div64Fx2:
1687 case Iop_CmpLT64Fx2:
1688 case Iop_CmpLE64Fx2:
1689 case Iop_CmpEQ64Fx2:
1690 case Iop_Add64Fx2:
1691 return binary64Fx2(mce, vatom1, vatom2);
1692
1693 case Iop_Sub64F0x2:
1694 case Iop_Mul64F0x2:
1695 case Iop_Min64F0x2:
1696 case Iop_Max64F0x2:
1697 case Iop_Div64F0x2:
1698 case Iop_CmpLT64F0x2:
1699 case Iop_CmpLE64F0x2:
1700 case Iop_CmpEQ64F0x2:
1701 case Iop_Add64F0x2:
1702 return binary64F0x2(mce, vatom1, vatom2);
1703
1704 /* V128-bit SIMD (SSE1-esque) */
1705
1706 case Iop_Sub32Fx4:
1707 case Iop_Mul32Fx4:
1708 case Iop_Min32Fx4:
1709 case Iop_Max32Fx4:
1710 case Iop_Div32Fx4:
1711 case Iop_CmpLT32Fx4:
1712 case Iop_CmpLE32Fx4:
1713 case Iop_CmpEQ32Fx4:
1714 case Iop_Add32Fx4:
1715 return binary32Fx4(mce, vatom1, vatom2);
1716
1717 case Iop_Sub32F0x4:
1718 case Iop_Mul32F0x4:
1719 case Iop_Min32F0x4:
1720 case Iop_Max32F0x4:
1721 case Iop_Div32F0x4:
1722 case Iop_CmpLT32F0x4:
1723 case Iop_CmpLE32F0x4:
1724 case Iop_CmpEQ32F0x4:
1725 case Iop_Add32F0x4:
1726 return binary32F0x4(mce, vatom1, vatom2);
1727
1728 /* V128-bit data-steering */
1729 case Iop_SetV128lo32:
1730 case Iop_SetV128lo64:
1731 case Iop_64HLtoV128:
1732 case Iop_InterleaveLO64x2:
1733 case Iop_InterleaveLO32x4:
1734 case Iop_InterleaveLO16x8:
1735 case Iop_InterleaveLO8x16:
1736 case Iop_InterleaveHI64x2:
1737 case Iop_InterleaveHI32x4:
1738 case Iop_InterleaveHI16x8:
1739 case Iop_InterleaveHI8x16:
1740 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1741
1742 /* Scalar floating point */
1743
1744 // case Iop_RoundF64:
1745 case Iop_F64toI64S:
1746 case Iop_I64StoF64:
1747 /* First arg is I32 (rounding mode), second is F64 or I64
1748 (data). */
1749 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1750
1751 case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1752 /* Takes two F64 args. */
1753 case Iop_F64toI32S:
1754 case Iop_F64toF32:
1755 /* First arg is I32 (rounding mode), second is F64 (data). */
1756 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1757
1758 case Iop_F64toI16S:
1759 /* First arg is I32 (rounding mode), second is F64 (data). */
1760 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1761
1762 case Iop_ScaleF64:
1763 case Iop_Yl2xF64:
1764 case Iop_Yl2xp1F64:
1765 case Iop_PRemF64:
1766 case Iop_AtanF64:
1767 case Iop_AddF64:
1768 case Iop_DivF64:
1769 case Iop_SubF64:
1770 case Iop_MulF64:
1771 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1772
1773 case Iop_CmpF64:
1774 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1775
1776 /* non-FP after here */
1777
1778 case Iop_DivModU64to32:
1779 case Iop_DivModS64to32:
1780 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1781
1782 case Iop_16HLto32:
1783 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
1784 case Iop_32HLto64:
1785 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1786
1787 case Iop_MullS32:
1788 case Iop_MullU32: {
1789 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1790 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1791 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1792 }
1793
1794 case Iop_MullS16:
1795 case Iop_MullU16: {
1796 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1797 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1798 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1799 }
1800
1801 case Iop_MullS8:
1802 case Iop_MullU8: {
1803 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1804 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1805 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1806 }
1807
1808 case Iop_Add32:
1809 # if 0
1810 return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
1811 # endif
1812 case Iop_Sub32:
1813 case Iop_Mul32:
1814 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1815
1816 case Iop_Mul16:
1817 case Iop_Add16:
1818 case Iop_Sub16:
1819 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1820
1821 case Iop_Sub8:
1822 case Iop_Add8:
1823 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1824
1825 case Iop_CmpLE32S: case Iop_CmpLE32U:
1826 case Iop_CmpLT32U: case Iop_CmpLT32S:
1827 case Iop_CmpEQ32: case Iop_CmpNE32:
1828 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1829
1830 case Iop_CmpEQ16: case Iop_CmpNE16:
1831 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1832
1833 case Iop_CmpEQ8: case Iop_CmpNE8:
1834 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1835
1836 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1837 /* Complain if the shift amount is undefined. Then simply
1838 shift the first arg's V bits by the real shift amount. */
1839 complainIfUndefined(mce, atom2);
1840 return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1841
1842 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
1843 /* Same scheme as with 32-bit shifts. */
1844 complainIfUndefined(mce, atom2);
1845 return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1846
1847 case Iop_Shl8: case Iop_Shr8:
1848 /* Same scheme as with 32-bit shifts. */
1849 complainIfUndefined(mce, atom2);
1850 return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1851
1852 case Iop_Shl64: case Iop_Shr64:
1853 /* Same scheme as with 32-bit shifts. */
1854 complainIfUndefined(mce, atom2);
1855 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1856
1857 case Iop_AndV128:
1858 uifu = mkUifUV128; difd = mkDifDV128;
1859 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
1860 case Iop_And64:
1861 uifu = mkUifU64; difd = mkDifD64;
1862 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
1863 case Iop_And32:
1864 uifu = mkUifU32; difd = mkDifD32;
1865 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1866 case Iop_And16:
1867 uifu = mkUifU16; difd = mkDifD16;
1868 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1869 case Iop_And8:
1870 uifu = mkUifU8; difd = mkDifD8;
1871 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1872
1873 case Iop_OrV128:
1874 uifu = mkUifUV128; difd = mkDifDV128;
1875 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
1876 case Iop_Or64:
1877 uifu = mkUifU64; difd = mkDifD64;
1878 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
1879 case Iop_Or32:
1880 uifu = mkUifU32; difd = mkDifD32;
1881 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1882 case Iop_Or16:
1883 uifu = mkUifU16; difd = mkDifD16;
1884 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1885 case Iop_Or8:
1886 uifu = mkUifU8; difd = mkDifD8;
1887 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1888
1889 do_And_Or:
1890 return
1891 assignNew(
1892 mce,
1893 and_or_ty,
1894 difd(mce, uifu(mce, vatom1, vatom2),
1895 difd(mce, improve(mce, atom1, vatom1),
1896 improve(mce, atom2, vatom2) ) ) );
1897
1898 case Iop_Xor8:
1899 return mkUifU8(mce, vatom1, vatom2);
1900 case Iop_Xor16:
1901 return mkUifU16(mce, vatom1, vatom2);
1902 case Iop_Xor32:
1903 return mkUifU32(mce, vatom1, vatom2);
1904 case Iop_Xor64:
1905 return mkUifU64(mce, vatom1, vatom2);
1906 case Iop_XorV128:
1907 return mkUifUV128(mce, vatom1, vatom2);
1908
1909 default:
1910 ppIROp(op);
1911 VG_(tool_panic)("memcheck:expr2vbits_Binop");
1912 }
1913 }
1914
1915
1916 static
expr2vbits_Unop(MCEnv * mce,IROp op,IRAtom * atom)1917 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1918 {
1919 IRAtom* vatom = expr2vbits( mce, atom );
1920 tl_assert(isOriginalAtom(mce,atom));
1921 switch (op) {
1922
1923 case Iop_Sqrt64Fx2:
1924 return unary64Fx2(mce, vatom);
1925
1926 case Iop_Sqrt64F0x2:
1927 return unary64F0x2(mce, vatom);
1928
1929 case Iop_Sqrt32Fx4:
1930 case Iop_RSqrt32Fx4:
1931 case Iop_Recip32Fx4:
1932 return unary32Fx4(mce, vatom);
1933
1934 case Iop_Sqrt32F0x4:
1935 case Iop_RSqrt32F0x4:
1936 case Iop_Recip32F0x4:
1937 return unary32F0x4(mce, vatom);
1938
1939 case Iop_32UtoV128:
1940 case Iop_64UtoV128:
1941 return assignNew(mce, Ity_V128, unop(op, vatom));
1942
1943 case Iop_F32toF64:
1944 case Iop_I32StoF64:
1945 case Iop_NegF64:
1946 case Iop_SinF64:
1947 case Iop_CosF64:
1948 case Iop_TanF64:
1949 case Iop_SqrtF64:
1950 case Iop_AbsF64:
1951 case Iop_2xm1F64:
1952 return mkPCastTo(mce, Ity_I64, vatom);
1953
1954 case Iop_Clz32:
1955 case Iop_Ctz32:
1956 return mkPCastTo(mce, Ity_I32, vatom);
1957
1958 case Iop_32Sto64:
1959 case Iop_32Uto64:
1960 case Iop_V128to64:
1961 case Iop_V128HIto64:
1962 return assignNew(mce, Ity_I64, unop(op, vatom));
1963
1964 case Iop_64to32:
1965 case Iop_64HIto32:
1966 case Iop_1Uto32:
1967 case Iop_8Uto32:
1968 case Iop_16Uto32:
1969 case Iop_16Sto32:
1970 case Iop_8Sto32:
1971 return assignNew(mce, Ity_I32, unop(op, vatom));
1972
1973 case Iop_8Sto16:
1974 case Iop_8Uto16:
1975 case Iop_32to16:
1976 case Iop_32HIto16:
1977 return assignNew(mce, Ity_I16, unop(op, vatom));
1978
1979 case Iop_1Uto8:
1980 case Iop_16to8:
1981 case Iop_32to8:
1982 return assignNew(mce, Ity_I8, unop(op, vatom));
1983
1984 case Iop_32to1:
1985 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
1986
1987 case Iop_ReinterpF64asI64:
1988 case Iop_ReinterpI64asF64:
1989 case Iop_ReinterpI32asF32:
1990 case Iop_NotV128:
1991 case Iop_Not64:
1992 case Iop_Not32:
1993 case Iop_Not16:
1994 case Iop_Not8:
1995 case Iop_Not1:
1996 return vatom;
1997
1998 default:
1999 ppIROp(op);
2000 VG_(tool_panic)("memcheck:expr2vbits_Unop");
2001 }
2002 }
2003
2004
2005 /* Worker function; do not call directly. */
2006 static
expr2vbits_LDle_WRK(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias)2007 IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2008 {
2009 void* helper;
2010 HChar* hname;
2011 IRDirty* di;
2012 IRTemp datavbits;
2013 IRAtom* addrAct;
2014
2015 tl_assert(isOriginalAtom(mce,addr));
2016
2017 /* First, emit a definedness test for the address. This also sets
2018 the address (shadow) to 'defined' following the test. */
2019 complainIfUndefined( mce, addr );
2020
2021 /* Now cook up a call to the relevant helper function, to read the
2022 data V bits from shadow memory. */
2023 ty = shadowType(ty);
2024 switch (ty) {
2025 case Ity_I64: helper = &MC_(helperc_LOADV8);
2026 hname = "MC_(helperc_LOADV8)";
2027 break;
2028 case Ity_I32: helper = &MC_(helperc_LOADV4);
2029 hname = "MC_(helperc_LOADV4)";
2030 break;
2031 case Ity_I16: helper = &MC_(helperc_LOADV2);
2032 hname = "MC_(helperc_LOADV2)";
2033 break;
2034 case Ity_I8: helper = &MC_(helperc_LOADV1);
2035 hname = "MC_(helperc_LOADV1)";
2036 break;
2037 default: ppIRType(ty);
2038 VG_(tool_panic)("memcheck:do_shadow_LDle");
2039 }
2040
2041 /* Generate the actual address into addrAct. */
2042 if (bias == 0) {
2043 addrAct = addr;
2044 } else {
2045 IROp mkAdd;
2046 IRAtom* eBias;
2047 IRType tyAddr = mce->hWordTy;
2048 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2049 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2050 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2051 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2052 }
2053
2054 /* We need to have a place to park the V bits we're just about to
2055 read. */
2056 datavbits = newIRTemp(mce->bb->tyenv, ty);
2057 di = unsafeIRDirty_1_N( datavbits,
2058 1/*regparms*/, hname, helper,
2059 mkIRExprVec_1( addrAct ));
2060 setHelperAnns( mce, di );
2061 stmt( mce->bb, IRStmt_Dirty(di) );
2062
2063 return mkexpr(datavbits);
2064 }
2065
2066
2067 static
expr2vbits_LDle(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias)2068 IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2069 {
2070 IRAtom *v64hi, *v64lo;
2071 switch (shadowType(ty)) {
2072 case Ity_I8:
2073 case Ity_I16:
2074 case Ity_I32:
2075 case Ity_I64:
2076 return expr2vbits_LDle_WRK(mce, ty, addr, bias);
2077 case Ity_V128:
2078 v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
2079 v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
2080 return assignNew( mce,
2081 Ity_V128,
2082 binop(Iop_64HLtoV128, v64hi, v64lo));
2083 default:
2084 VG_(tool_panic)("expr2vbits_LDle");
2085 }
2086 }
2087
2088
2089 static
expr2vbits_Mux0X(MCEnv * mce,IRAtom * cond,IRAtom * expr0,IRAtom * exprX)2090 IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
2091 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
2092 {
2093 IRAtom *vbitsC, *vbits0, *vbitsX;
2094 IRType ty;
2095 /* Given Mux0X(cond,expr0,exprX), generate
2096 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
2097 That is, steer the V bits like the originals, but trash the
2098 result if the steering value is undefined. This gives
2099 lazy propagation. */
2100 tl_assert(isOriginalAtom(mce, cond));
2101 tl_assert(isOriginalAtom(mce, expr0));
2102 tl_assert(isOriginalAtom(mce, exprX));
2103
2104 vbitsC = expr2vbits(mce, cond);
2105 vbits0 = expr2vbits(mce, expr0);
2106 vbitsX = expr2vbits(mce, exprX);
2107 ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2108
2109 return
2110 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
2111 mkPCastTo(mce, ty, vbitsC) );
2112 }
2113
2114 /* --------- This is the main expression-handling function. --------- */
2115
2116 static
expr2vbits(MCEnv * mce,IRExpr * e)2117 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2118 {
2119 switch (e->tag) {
2120
2121 case Iex_Get:
2122 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2123
2124 case Iex_GetI:
2125 return shadow_GETI( mce, e->Iex.GetI.descr,
2126 e->Iex.GetI.ix, e->Iex.GetI.bias );
2127
2128 case Iex_RdTmp:
2129 return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
2130
2131 case Iex_Const:
2132 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2133
2134 case Iex_Binop:
2135 return expr2vbits_Binop(
2136 mce,
2137 e->Iex.Binop.op,
2138 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2139 );
2140
2141 case Iex_Unop:
2142 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2143
2144 case Iex_Load:
2145 return expr2vbits_LDle( mce, e->Iex.Load.ty,
2146 e->Iex.Load.addr, 0/*addr bias*/ );
2147
2148 case Iex_CCall:
2149 return mkLazyN( mce, e->Iex.CCall.args,
2150 e->Iex.CCall.retty,
2151 e->Iex.CCall.cee );
2152
2153 case Iex_Mux0X:
2154 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2155 e->Iex.Mux0X.exprX);
2156
2157 default:
2158 VG_(printf)("\n");
2159 ppIRExpr(e);
2160 VG_(printf)("\n");
2161 VG_(tool_panic)("memcheck: expr2vbits");
2162 }
2163 }
2164
2165 /*------------------------------------------------------------*/
2166 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
2167 /*------------------------------------------------------------*/
2168
2169 /* Widen a value to the host word size. */
2170
2171 static
zwidenToHostWord(MCEnv * mce,IRAtom * vatom)2172 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2173 {
2174 IRType ty, tyH;
2175
2176 /* vatom is vbits-value and as such can only have a shadow type. */
2177 tl_assert(isShadowAtom(mce,vatom));
2178
2179 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
2180 tyH = mce->hWordTy;
2181
2182 if (tyH == Ity_I32) {
2183 switch (ty) {
2184 case Ity_I32: return vatom;
2185 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2186 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2187 default: goto unhandled;
2188 }
2189 } else {
2190 goto unhandled;
2191 }
2192 unhandled:
2193 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2194 VG_(tool_panic)("zwidenToHostWord");
2195 }
2196
2197
2198 /* Generate a shadow store. addr is always the original address atom.
2199 You can pass in either originals or V-bits for the data atom, but
2200 obviously not both. */
2201
2202 static
do_shadow_STle(MCEnv * mce,IRAtom * addr,UInt bias,IRAtom * data,IRAtom * vdata)2203 void do_shadow_STle ( MCEnv* mce,
2204 IRAtom* addr, UInt bias,
2205 IRAtom* data, IRAtom* vdata )
2206 {
2207 IROp mkAdd;
2208 IRType ty, tyAddr;
2209 IRDirty *di, *diLo64, *diHi64;
2210 IRAtom *addrAct, *addrLo64, *addrHi64;
2211 IRAtom *vdataLo64, *vdataHi64;
2212 IRAtom *eBias, *eBias0, *eBias8;
2213 void* helper = NULL;
2214 HChar* hname = NULL;
2215
2216 tyAddr = mce->hWordTy;
2217 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2218 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2219
2220 di = diLo64 = diHi64 = NULL;
2221 eBias = eBias0 = eBias8 = NULL;
2222 addrAct = addrLo64 = addrHi64 = NULL;
2223 vdataLo64 = vdataHi64 = NULL;
2224
2225 if (data) {
2226 tl_assert(!vdata);
2227 tl_assert(isOriginalAtom(mce, data));
2228 tl_assert(bias == 0);
2229 vdata = expr2vbits( mce, data );
2230 } else {
2231 tl_assert(vdata);
2232 }
2233
2234 tl_assert(isOriginalAtom(mce,addr));
2235 tl_assert(isShadowAtom(mce,vdata));
2236
2237 ty = typeOfIRExpr(mce->bb->tyenv, vdata);
2238
2239 /* First, emit a definedness test for the address. This also sets
2240 the address (shadow) to 'defined' following the test. */
2241 complainIfUndefined( mce, addr );
2242
2243 /* Now decide which helper function to call to write the data V
2244 bits into shadow memory. */
2245 switch (ty) {
2246 case Ity_V128: /* we'll use the helper twice */
2247 case Ity_I64: helper = &MC_(helperc_STOREV8);
2248 hname = "MC_(helperc_STOREV8)";
2249 break;
2250 case Ity_I32: helper = &MC_(helperc_STOREV4);
2251 hname = "MC_(helperc_STOREV4)";
2252 break;
2253 case Ity_I16: helper = &MC_(helperc_STOREV2);
2254 hname = "MC_(helperc_STOREV2)";
2255 break;
2256 case Ity_I8: helper = &MC_(helperc_STOREV1);
2257 hname = "MC_(helperc_STOREV1)";
2258 break;
2259 default: VG_(tool_panic)("memcheck:do_shadow_STle");
2260 }
2261
2262 if (ty == Ity_V128) {
2263
2264 /* V128-bit case */
2265 /* See comment in next clause re 64-bit regparms */
2266 eBias0 = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2267 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
2268 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
2269 diLo64 = unsafeIRDirty_0_N(
2270 1/*regparms*/, hname, helper,
2271 mkIRExprVec_2( addrLo64, vdataLo64 ));
2272
2273 eBias8 = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2274 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
2275 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
2276 diHi64 = unsafeIRDirty_0_N(
2277 1/*regparms*/, hname, helper,
2278 mkIRExprVec_2( addrHi64, vdataHi64 ));
2279
2280 setHelperAnns( mce, diLo64 );
2281 setHelperAnns( mce, diHi64 );
2282 stmt( mce->bb, IRStmt_Dirty(diLo64) );
2283 stmt( mce->bb, IRStmt_Dirty(diHi64) );
2284
2285 } else {
2286
2287 /* 8/16/32/64-bit cases */
2288 /* Generate the actual address into addrAct. */
2289 if (bias == 0) {
2290 addrAct = addr;
2291 } else {
2292 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2293 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2294 }
2295
2296 if (ty == Ity_I64) {
2297 /* We can't do this with regparm 2 on 32-bit platforms, since
2298 the back ends aren't clever enough to handle 64-bit
2299 regparm args. Therefore be different. */
2300 di = unsafeIRDirty_0_N(
2301 1/*regparms*/, hname, helper,
2302 mkIRExprVec_2( addrAct, vdata ));
2303 } else {
2304 di = unsafeIRDirty_0_N(
2305 2/*regparms*/, hname, helper,
2306 mkIRExprVec_2( addrAct,
2307 zwidenToHostWord( mce, vdata )));
2308 }
2309 setHelperAnns( mce, di );
2310 stmt( mce->bb, IRStmt_Dirty(di) );
2311 }
2312
2313 }
2314
2315
2316 /* Do lazy pessimistic propagation through a dirty helper call, by
2317 looking at the annotations on it. This is the most complex part of
2318 Memcheck. */
2319
szToITy(Int n)2320 static IRType szToITy ( Int n )
2321 {
2322 switch (n) {
2323 case 1: return Ity_I8;
2324 case 2: return Ity_I16;
2325 case 4: return Ity_I32;
2326 case 8: return Ity_I64;
2327 default: VG_(tool_panic)("szToITy(memcheck)");
2328 }
2329 }
2330
2331 static
do_shadow_Dirty(MCEnv * mce,IRDirty * d)2332 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2333 {
2334 Int i, n, offset, toDo, gSz, gOff;
2335 IRAtom *src, *here, *curr;
2336 IRType tyAddr, tySrc, tyDst;
2337 IRTemp dst;
2338
2339 /* First check the guard. */
2340 complainIfUndefined(mce, d->guard);
2341
2342 /* Now round up all inputs and PCast over them. */
2343 curr = definedOfType(Ity_I32);
2344
2345 /* Inputs: unmasked args */
2346 for (i = 0; d->args[i]; i++) {
2347 if (d->cee->mcx_mask & (1<<i)) {
2348 /* ignore this arg */
2349 } else {
2350 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2351 curr = mkUifU32(mce, here, curr);
2352 }
2353 }
2354
2355 /* Inputs: guest state that we read. */
2356 for (i = 0; i < d->nFxState; i++) {
2357 tl_assert(d->fxState[i].fx != Ifx_None);
2358 if (d->fxState[i].fx == Ifx_Write)
2359 continue;
2360
2361 /* Ignore any sections marked as 'always defined'. */
2362 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
2363 if (0)
2364 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2365 d->fxState[i].offset, d->fxState[i].size );
2366 continue;
2367 }
2368
2369 /* This state element is read or modified. So we need to
2370 consider it. If larger than 8 bytes, deal with it in 8-byte
2371 chunks. */
2372 gSz = d->fxState[i].size;
2373 gOff = d->fxState[i].offset;
2374 tl_assert(gSz > 0);
2375 while (True) {
2376 if (gSz == 0) break;
2377 n = gSz <= 8 ? gSz : 8;
2378 /* update 'curr' with UifU of the state slice
2379 gOff .. gOff+n-1 */
2380 tySrc = szToITy( n );
2381 src = assignNew( mce, tySrc,
2382 shadow_GET(mce, gOff, tySrc ) );
2383 here = mkPCastTo( mce, Ity_I32, src );
2384 curr = mkUifU32(mce, here, curr);
2385 gSz -= n;
2386 gOff += n;
2387 }
2388
2389 }
2390
2391 /* Inputs: memory. First set up some info needed regardless of
2392 whether we're doing reads or writes. */
2393 tyAddr = Ity_INVALID;
2394
2395 if (d->mFx != Ifx_None) {
2396 /* Because we may do multiple shadow loads/stores from the same
2397 base address, it's best to do a single test of its
2398 definedness right now. Post-instrumentation optimisation
2399 should remove all but this test. */
2400 tl_assert(d->mAddr);
2401 complainIfUndefined(mce, d->mAddr);
2402
2403 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2404 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2405 tl_assert(tyAddr == mce->hWordTy); /* not really right */
2406 }
2407
2408 /* Deal with memory inputs (reads or modifies) */
2409 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2410 offset = 0;
2411 toDo = d->mSize;
2412 /* chew off 32-bit chunks */
2413 while (toDo >= 4) {
2414 here = mkPCastTo(
2415 mce, Ity_I32,
2416 expr2vbits_LDle ( mce, Ity_I32,
2417 d->mAddr, d->mSize - toDo )
2418 );
2419 curr = mkUifU32(mce, here, curr);
2420 toDo -= 4;
2421 }
2422 /* chew off 16-bit chunks */
2423 while (toDo >= 2) {
2424 here = mkPCastTo(
2425 mce, Ity_I32,
2426 expr2vbits_LDle ( mce, Ity_I16,
2427 d->mAddr, d->mSize - toDo )
2428 );
2429 curr = mkUifU32(mce, here, curr);
2430 toDo -= 2;
2431 }
2432 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2433 }
2434
2435 /* Whew! So curr is a 32-bit V-value summarising pessimistically
2436 all the inputs to the helper. Now we need to re-distribute the
2437 results to all destinations. */
2438
2439 /* Outputs: the destination temporary, if there is one. */
2440 if (d->tmp != IRTemp_INVALID) {
2441 dst = findShadowTmp(mce, d->tmp);
2442 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2443 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2444 }
2445
2446 /* Outputs: guest state that we write or modify. */
2447 for (i = 0; i < d->nFxState; i++) {
2448 tl_assert(d->fxState[i].fx != Ifx_None);
2449 if (d->fxState[i].fx == Ifx_Read)
2450 continue;
2451 /* Ignore any sections marked as 'always defined'. */
2452 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2453 continue;
2454 /* This state element is written or modified. So we need to
2455 consider it. If larger than 8 bytes, deal with it in 8-byte
2456 chunks. */
2457 gSz = d->fxState[i].size;
2458 gOff = d->fxState[i].offset;
2459 tl_assert(gSz > 0);
2460 while (True) {
2461 if (gSz == 0) break;
2462 n = gSz <= 8 ? gSz : 8;
2463 /* Write suitably-casted 'curr' to the state slice
2464 gOff .. gOff+n-1 */
2465 tyDst = szToITy( n );
2466 do_shadow_PUT( mce, gOff,
2467 NULL, /* original atom */
2468 mkPCastTo( mce, tyDst, curr ) );
2469 gSz -= n;
2470 gOff += n;
2471 }
2472 }
2473
2474 /* Outputs: memory that we write or modify. */
2475 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2476 offset = 0;
2477 toDo = d->mSize;
2478 /* chew off 32-bit chunks */
2479 while (toDo >= 4) {
2480 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2481 NULL, /* original data */
2482 mkPCastTo( mce, Ity_I32, curr ) );
2483 toDo -= 4;
2484 }
2485 /* chew off 16-bit chunks */
2486 while (toDo >= 2) {
2487 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2488 NULL, /* original data */
2489 mkPCastTo( mce, Ity_I16, curr ) );
2490 toDo -= 2;
2491 }
2492 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2493 }
2494
2495 }
2496
2497
2498 /*------------------------------------------------------------*/
2499 /*--- Memcheck main ---*/
2500 /*------------------------------------------------------------*/
2501
isBogusAtom(IRAtom * at)2502 static Bool isBogusAtom ( IRAtom* at )
2503 {
2504 ULong n = 0;
2505 IRConst* con;
2506 tl_assert(isIRAtom(at));
2507 if (at->tag == Iex_RdTmp)
2508 return False;
2509 tl_assert(at->tag == Iex_Const);
2510 con = at->Iex.Const.con;
2511 switch (con->tag) {
2512 case Ico_U8: n = (ULong)con->Ico.U8; break;
2513 case Ico_U16: n = (ULong)con->Ico.U16; break;
2514 case Ico_U32: n = (ULong)con->Ico.U32; break;
2515 case Ico_U64: n = (ULong)con->Ico.U64; break;
2516 default: ppIRExpr(at); tl_assert(0);
2517 }
2518 /* VG_(printf)("%llx\n", n); */
2519 return (n == 0xFEFEFEFF
2520 || n == 0x80808080
2521 || n == 0x1010101
2522 || n == 1010100);
2523 }
2524
2525 __attribute__((unused))
checkForBogusLiterals(IRStmt * st)2526 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2527 {
2528 Int i;
2529 IRExpr* e;
2530 switch (st->tag) {
2531 case Ist_WrTmp:
2532 e = st->Ist.WrTmp.data;
2533 switch (e->tag) {
2534 case Iex_Get:
2535 case Iex_RdTmp:
2536 return False;
2537 case Iex_Unop:
2538 return isBogusAtom(e->Iex.Unop.arg);
2539 case Iex_Binop:
2540 return isBogusAtom(e->Iex.Binop.arg1)
2541 || isBogusAtom(e->Iex.Binop.arg2);
2542 case Iex_Mux0X:
2543 return isBogusAtom(e->Iex.Mux0X.cond)
2544 || isBogusAtom(e->Iex.Mux0X.expr0)
2545 || isBogusAtom(e->Iex.Mux0X.exprX);
2546 case Iex_Load:
2547 return isBogusAtom(e->Iex.Load.addr);
2548 case Iex_CCall:
2549 for (i = 0; e->Iex.CCall.args[i]; i++)
2550 if (isBogusAtom(e->Iex.CCall.args[i]))
2551 return True;
2552 return False;
2553 default:
2554 goto unhandled;
2555 }
2556 case Ist_Put:
2557 return isBogusAtom(st->Ist.Put.data);
2558 case Ist_Store:
2559 return isBogusAtom(st->Ist.Store.addr)
2560 || isBogusAtom(st->Ist.Store.data);
2561 case Ist_Exit:
2562 return isBogusAtom(st->Ist.Exit.guard);
2563 default:
2564 unhandled:
2565 ppIRStmt(st);
2566 VG_(tool_panic)("hasBogusLiterals");
2567 }
2568 }
2569
mc_instrument(void * closureV,IRSB * bb_in,VexGuestLayout * layout,VexGuestExtents * vge,IRType gWordTy,IRType hWordTy)2570 IRSB* mc_instrument ( void* closureV,
2571 IRSB* bb_in, VexGuestLayout* layout,
2572 VexGuestExtents* vge,
2573 IRType gWordTy, IRType hWordTy )
2574 {
2575 Bool verboze = False; //True;
2576
2577 /* Bool hasBogusLiterals = False; */
2578
2579 Int i, j, first_stmt;
2580 IRStmt* st;
2581 MCEnv mce;
2582
2583 /* Set up BB */
2584 IRSB* bb = emptyIRSB();
2585 bb->tyenv = deepCopyIRTypeEnv(bb_in->tyenv);
2586 bb->next = deepCopyIRExpr(bb_in->next);
2587 bb->jumpkind = bb_in->jumpkind;
2588
2589 /* Set up the running environment. Only .bb is modified as we go
2590 along. */
2591 mce.bb = bb;
2592 mce.layout = layout;
2593 mce.n_originalTmps = bb->tyenv->types_used;
2594 mce.hWordTy = hWordTy;
2595 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2596 for (i = 0; i < mce.n_originalTmps; i++)
2597 mce.tmpMap[i] = IRTemp_INVALID;
2598
2599 /* Iterate over the stmts. */
2600
2601 for (i = 0; i < bb_in->stmts_used; i++) {
2602 st = bb_in->stmts[i];
2603 if (!st) continue;
2604
2605 tl_assert(isFlatIRStmt(st));
2606
2607 /*
2608 if (!hasBogusLiterals) {
2609 hasBogusLiterals = checkForBogusLiterals(st);
2610 if (hasBogusLiterals) {
2611 VG_(printf)("bogus: ");
2612 ppIRStmt(st);
2613 VG_(printf)("\n");
2614 }
2615 }
2616 */
2617 first_stmt = bb->stmts_used;
2618
2619 if (verboze) {
2620 ppIRStmt(st);
2621 VG_(printf)("\n\n");
2622 }
2623
2624 switch (st->tag) {
2625
2626 case Ist_WrTmp:
2627 assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
2628 expr2vbits( &mce, st->Ist.WrTmp.data) );
2629 break;
2630
2631 case Ist_Put:
2632 do_shadow_PUT( &mce,
2633 st->Ist.Put.offset,
2634 st->Ist.Put.data,
2635 NULL /* shadow atom */ );
2636 break;
2637
2638 case Ist_PutI:
2639 do_shadow_PUTI( &mce,
2640 st->Ist.PutI.descr,
2641 st->Ist.PutI.ix,
2642 st->Ist.PutI.bias,
2643 st->Ist.PutI.data );
2644 break;
2645
2646 case Ist_Store:
2647 do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
2648 st->Ist.Store.data,
2649 NULL /* shadow data */ );
2650 break;
2651
2652 case Ist_Exit:
2653 /* if (!hasBogusLiterals) */
2654 complainIfUndefined( &mce, st->Ist.Exit.guard );
2655 break;
2656
2657 case Ist_Dirty:
2658 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
2659 break;
2660
2661 case Ist_IMark:
2662 case Ist_NoOp:
2663 break;
2664
2665 default:
2666 VG_(printf)("\n");
2667 ppIRStmt(st);
2668 VG_(printf)("\n");
2669 VG_(tool_panic)("memcheck: unhandled IRStmt");
2670
2671 } /* switch (st->tag) */
2672
2673 if (verboze) {
2674 for (j = first_stmt; j < bb->stmts_used; j++) {
2675 VG_(printf)(" ");
2676 ppIRStmt(bb->stmts[j]);
2677 VG_(printf)("\n");
2678 }
2679 VG_(printf)("\n");
2680 }
2681
2682 addStmtToIRSB(bb, st);
2683
2684 }
2685
2686 /* Now we need to complain if the jump target is undefined. */
2687 first_stmt = bb->stmts_used;
2688
2689 if (verboze) {
2690 VG_(printf)("bb->next = ");
2691 ppIRExpr(bb->next);
2692 VG_(printf)("\n\n");
2693 }
2694
2695 complainIfUndefined( &mce, bb->next );
2696
2697 if (verboze) {
2698 for (j = first_stmt; j < bb->stmts_used; j++) {
2699 VG_(printf)(" ");
2700 ppIRStmt(bb->stmts[j]);
2701 VG_(printf)("\n");
2702 }
2703 VG_(printf)("\n");
2704 }
2705
2706 return bb;
2707 }
2708 #endif /* UNUSED */
2709
2710 /*--------------------------------------------------------------------*/
2711 /*--- end test_main.c ---*/
2712 /*--------------------------------------------------------------------*/
2713