1
2 /*---------------------------------------------------------------*/
3 /*--- begin test_main.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2015 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <assert.h>
39 #include <string.h>
40
41 #include "libvex_basictypes.h"
42 #include "libvex.h"
43
44 #include "test_main.h"
45
46
47 /*---------------------------------------------------------------*/
48 /*--- Test ---*/
49 /*---------------------------------------------------------------*/
50
51
52 __attribute__ ((noreturn))
53 static
failure_exit(void)54 void failure_exit ( void )
55 {
56 fprintf(stdout, "VEX did failure_exit. Bye.\n");
57 exit(1);
58 }
59
60 static
log_bytes(const HChar * bytes,SizeT nbytes)61 void log_bytes ( const HChar* bytes, SizeT nbytes )
62 {
63 fwrite ( bytes, 1, nbytes, stdout );
64 }
65
66 #define N_LINEBUF 10000
67 static HChar linebuf[N_LINEBUF];
68
69 #define N_ORIGBUF 10000
70 #define N_TRANSBUF 5000
71
72 static UChar origbuf[N_ORIGBUF];
73 static UChar transbuf[N_TRANSBUF];
74
75 static Bool verbose = True;
76
77 /* Forwards */
78 #if 1 /* UNUSED */
79 //static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
80 static
81 IRSB* mc_instrument ( void* closureV,
82 IRSB* bb_in, VexGuestLayout* layout,
83 VexGuestExtents* vge,
84 IRType gWordTy, IRType hWordTy );
85 #endif
86
chase_into_not_ok(void * opaque,Addr dst)87 static Bool chase_into_not_ok ( void* opaque, Addr dst ) {
88 return False;
89 }
needs_self_check(void * closureV,VexRegisterUpdates * pxControl,const VexGuestExtents * vge)90 static UInt needs_self_check ( void *closureV, VexRegisterUpdates *pxControl,
91 const VexGuestExtents *vge ) {
92 return 0;
93 }
94
main(int argc,char ** argv)95 int main ( int argc, char** argv )
96 {
97 FILE* f;
98 Int i;
99 UInt u, sum;
100 Addr32 orig_addr;
101 Int bb_number, n_bbs_done = 0;
102 Int orig_nbytes, trans_used;
103 VexTranslateResult tres;
104 VexControl vcon;
105 VexGuestExtents vge;
106 VexArchInfo vai_x86, vai_amd64, vai_ppc32, vai_arm, vai_mips32, vai_mips64;
107 VexAbiInfo vbi;
108 VexTranslateArgs vta;
109
110 if (argc != 2) {
111 fprintf(stderr, "usage: vex file.orig\n");
112 exit(1);
113 }
114 f = fopen(argv[1], "r");
115 if (!f) {
116 fprintf(stderr, "can't open `%s'\n", argv[1]);
117 exit(1);
118 }
119
120 /* Run with default params. However, we can't allow bb chasing
121 since that causes the front end to get segfaults when it tries
122 to read code outside the initial BB we hand it. So when calling
123 LibVEX_Translate, send in a chase-into predicate that always
124 returns False. */
125 LibVEX_default_VexControl ( &vcon );
126 vcon.iropt_level = 2;
127 vcon.guest_max_insns = 60;
128
129 LibVEX_Init ( &failure_exit, &log_bytes,
130 1, /* debug_paranoia */
131 &vcon );
132
133
134 while (!feof(f)) {
135
136 __attribute__((unused))
137 char* unused1 = fgets(linebuf, N_LINEBUF,f);
138 if (linebuf[0] == 0) continue;
139 if (linebuf[0] != '.') continue;
140
141 if (n_bbs_done == TEST_N_BBS) break;
142 n_bbs_done++;
143
144 /* first line is: . bb-number bb-addr n-bytes */
145 assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
146 & bb_number,
147 & orig_addr, & orig_nbytes ));
148 assert(orig_nbytes >= 1);
149 assert(!feof(f));
150 __attribute__((unused))
151 char* unused2 = fgets(linebuf, N_LINEBUF,f);
152 assert(linebuf[0] == '.');
153
154 /* second line is: . byte byte byte etc */
155 if (verbose)
156 printf("============ Basic Block %d, Done %d, "
157 "Start %x, nbytes %2d ============",
158 bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
159
160 /* thumb ITstate analysis needs to examine the 18 bytes
161 preceding the first instruction. So let's leave the first 18
162 zeroed out. */
163 memset(origbuf, 0, sizeof(origbuf));
164
165 assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
166 for (i = 0; i < orig_nbytes; i++) {
167 assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
168 origbuf[18+ i] = (UChar)u;
169 }
170
171 /* FIXME: put sensible values into the .hwcaps fields */
172 LibVEX_default_VexArchInfo(&vai_x86);
173 vai_x86.hwcaps = VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1
174 | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
175 vai_x86.endness = VexEndnessLE;
176
177 LibVEX_default_VexArchInfo(&vai_amd64);
178 vai_amd64.hwcaps = 0;
179 vai_amd64.endness = VexEndnessLE;
180
181 LibVEX_default_VexArchInfo(&vai_ppc32);
182 vai_ppc32.hwcaps = 0;
183 vai_ppc32.ppc_icache_line_szB = 128;
184
185 LibVEX_default_VexArchInfo(&vai_arm);
186 vai_arm.hwcaps = VEX_HWCAPS_ARM_VFP3 | VEX_HWCAPS_ARM_NEON | 7;
187
188 LibVEX_default_VexArchInfo(&vai_mips32);
189 vai_mips32.endness = VexEndnessLE;
190 vai_mips32.hwcaps = VEX_PRID_COMP_MIPS;
191
192 LibVEX_default_VexArchInfo(&vai_mips64);
193 vai_mips64.endness = VexEndnessLE;
194
195 LibVEX_default_VexAbiInfo(&vbi);
196 vbi.guest_stack_redzone_size = 128;
197
198 /* ----- Set up args for LibVEX_Translate ----- */
199
200 vta.abiinfo_both = vbi;
201 vta.guest_bytes = &origbuf[18];
202 vta.guest_bytes_addr = orig_addr;
203 vta.callback_opaque = NULL;
204 vta.chase_into_ok = chase_into_not_ok;
205 vta.guest_extents = &vge;
206 vta.host_bytes = transbuf;
207 vta.host_bytes_size = N_TRANSBUF;
208 vta.host_bytes_used = &trans_used;
209
210 #if 0 /* ppc32 -> ppc32 */
211 vta.arch_guest = VexArchPPC32;
212 vta.archinfo_guest = vai_ppc32;
213 vta.arch_host = VexArchPPC32;
214 vta.archinfo_host = vai_ppc32;
215 #endif
216 #if 0 /* amd64 -> amd64 */
217 vta.arch_guest = VexArchAMD64;
218 vta.archinfo_guest = vai_amd64;
219 vta.arch_host = VexArchAMD64;
220 vta.archinfo_host = vai_amd64;
221 #endif
222 #if 0 /* x86 -> x86 */
223 vta.arch_guest = VexArchX86;
224 vta.archinfo_guest = vai_x86;
225 vta.arch_host = VexArchX86;
226 vta.archinfo_host = vai_x86;
227 #endif
228 #if 1 /* x86 -> mips32 */
229 vta.arch_guest = VexArchX86;
230 vta.archinfo_guest = vai_x86;
231 vta.arch_host = VexArchMIPS32;
232 vta.archinfo_host = vai_mips32;
233 #endif
234 #if 0 /* amd64 -> mips64 */
235 vta.arch_guest = VexArchAMD64;
236 vta.archinfo_guest = vai_amd64;
237 vta.arch_host = VexArchMIPS64;
238 vta.archinfo_host = vai_mips64;
239 #endif
240 #if 0 /* arm -> arm */
241 vta.arch_guest = VexArchARM;
242 vta.archinfo_guest = vai_arm;
243 vta.arch_host = VexArchARM;
244 vta.archinfo_host = vai_arm;
245 /* ARM/Thumb only hacks, that are needed to keep the ITstate
246 analyser in the front end happy. */
247 vta.guest_bytes = &origbuf[18 +1];
248 vta.guest_bytes_addr = (Addr) &origbuf[18 +1];
249 #endif
250
251 #if 1 /* no instrumentation */
252 vta.instrument1 = NULL;
253 vta.instrument2 = NULL;
254 #endif
255 #if 0 /* addrcheck */
256 vta.instrument1 = ac_instrument;
257 vta.instrument2 = NULL;
258 #endif
259 #if 0 /* memcheck */
260 vta.instrument1 = mc_instrument;
261 vta.instrument2 = NULL;
262 #endif
263 vta.needs_self_check = needs_self_check;
264 vta.preamble_function = NULL;
265 vta.traceflags = TEST_FLAGS;
266 vta.addProfInc = False;
267 vta.sigill_diag = True;
268
269 vta.disp_cp_chain_me_to_slowEP = (void*)0x12345678;
270 vta.disp_cp_chain_me_to_fastEP = (void*)0x12345679;
271 vta.disp_cp_xindir = (void*)0x1234567A;
272 vta.disp_cp_xassisted = (void*)0x1234567B;
273
274 vta.finaltidy = NULL;
275
276 for (i = 0; i < TEST_N_ITERS; i++)
277 tres = LibVEX_Translate ( &vta );
278
279 if (tres.status != VexTransOK)
280 printf("\ntres = %d\n", (Int)tres.status);
281 assert(tres.status == VexTransOK);
282 assert(tres.n_sc_extents == 0);
283 assert(vge.n_used == 1);
284 assert((UInt)(vge.len[0]) == orig_nbytes);
285
286 sum = 0;
287 for (i = 0; i < trans_used; i++)
288 sum += (UInt)transbuf[i];
289 printf ( " %6.2f ... %u\n",
290 (double)trans_used / (double)vge.len[0], sum );
291 }
292
293 fclose(f);
294 printf("\n");
295 LibVEX_ShowAllocStats();
296
297 return 0;
298 }
299
300 //////////////////////////////////////////////////////////////////////
301 //////////////////////////////////////////////////////////////////////
302 //////////////////////////////////////////////////////////////////////
303 //////////////////////////////////////////////////////////////////////
304 //////////////////////////////////////////////////////////////////////
305 //////////////////////////////////////////////////////////////////////
306 //////////////////////////////////////////////////////////////////////
307 //////////////////////////////////////////////////////////////////////
308
309 #if 0 /* UNUSED */
310
311 static
312 __attribute((noreturn))
313 void panic ( HChar* s )
314 {
315 printf("\npanic: %s\n", s);
316 failure_exit();
317 }
318
319 static
320 IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
321 {
322 /* Use this rather than eg. -1 because it's a UInt. */
323 #define INVALID_DATA_SIZE 999999
324
325 Int i;
326 Int sz;
327 IRCallee* helper;
328 IRStmt* st;
329 IRExpr* data;
330 IRExpr* addr;
331 Bool needSz;
332
333 /* Set up BB */
334 IRSB* bb = emptyIRSB();
335 bb->tyenv = dopyIRTypeEnv(bb_in->tyenv);
336 bb->next = dopyIRExpr(bb_in->next);
337 bb->jumpkind = bb_in->jumpkind;
338
339 /* No loads to consider in ->next. */
340 assert(isIRAtom(bb_in->next));
341
342 for (i = 0; i < bb_in->stmts_used; i++) {
343 st = bb_in->stmts[i];
344 if (!st) continue;
345
346 switch (st->tag) {
347
348 case Ist_Tmp:
349 data = st->Ist.Tmp.data;
350 if (data->tag == Iex_LDle) {
351 addr = data->Iex.LDle.addr;
352 sz = sizeofIRType(data->Iex.LDle.ty);
353 needSz = False;
354 switch (sz) {
355 case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
356 (void*)0x12345601); break;
357 case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
358 (void*)0x12345602); break;
359 case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
360 (void*)0x12345603); break;
361 default: helper = mkIRCallee(0, "ac_helperc_LOADN",
362 (void*)0x12345604);
363 needSz = True; break;
364 }
365 if (needSz) {
366 addStmtToIRSB(
367 bb,
368 IRStmt_Dirty(
369 unsafeIRDirty_0_N( helper->regparms,
370 helper->name, helper->addr,
371 mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
372 ));
373 } else {
374 addStmtToIRSB(
375 bb,
376 IRStmt_Dirty(
377 unsafeIRDirty_0_N( helper->regparms,
378 helper->name, helper->addr,
379 mkIRExprVec_1(addr) )
380 ));
381 }
382 }
383 break;
384
385 case Ist_STle:
386 data = st->Ist.STle.data;
387 addr = st->Ist.STle.addr;
388 assert(isIRAtom(data));
389 assert(isIRAtom(addr));
390 sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
391 needSz = False;
392 switch (sz) {
393 case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
394 (void*)0x12345605); break;
395 case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
396 (void*)0x12345606); break;
397 case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
398 (void*)0x12345607); break;
399 default: helper = mkIRCallee(0, "ac_helperc_STOREN",
400 (void*)0x12345608);
401 needSz = True; break;
402 }
403 if (needSz) {
404 addStmtToIRSB(
405 bb,
406 IRStmt_Dirty(
407 unsafeIRDirty_0_N( helper->regparms,
408 helper->name, helper->addr,
409 mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
410 ));
411 } else {
412 addStmtToIRSB(
413 bb,
414 IRStmt_Dirty(
415 unsafeIRDirty_0_N( helper->regparms,
416 helper->name, helper->addr,
417 mkIRExprVec_1(addr) )
418 ));
419 }
420 break;
421
422 case Ist_Put:
423 assert(isIRAtom(st->Ist.Put.data));
424 break;
425
426 case Ist_PutI:
427 assert(isIRAtom(st->Ist.PutI.ix));
428 assert(isIRAtom(st->Ist.PutI.data));
429 break;
430
431 case Ist_Exit:
432 assert(isIRAtom(st->Ist.Exit.guard));
433 break;
434
435 case Ist_Dirty:
436 /* If the call doesn't interact with memory, we ain't
437 interested. */
438 if (st->Ist.Dirty.details->mFx == Ifx_None)
439 break;
440 goto unhandled;
441
442 default:
443 unhandled:
444 printf("\n");
445 ppIRStmt(st);
446 printf("\n");
447 panic("addrcheck: unhandled IRStmt");
448 }
449
450 addStmtToIRSB( bb, dopyIRStmt(st));
451 }
452
453 return bb;
454 }
455 #endif /* UNUSED */
456
457 //////////////////////////////////////////////////////////////////////
458 //////////////////////////////////////////////////////////////////////
459 //////////////////////////////////////////////////////////////////////
460 //////////////////////////////////////////////////////////////////////
461 //////////////////////////////////////////////////////////////////////
462 //////////////////////////////////////////////////////////////////////
463 //////////////////////////////////////////////////////////////////////
464 //////////////////////////////////////////////////////////////////////
465
466 #if 1 /* UNUSED */
467
468 static
469 __attribute((noreturn))
panic(HChar * s)470 void panic ( HChar* s )
471 {
472 printf("\npanic: %s\n", s);
473 failure_exit();
474 }
475
476 #define tl_assert(xxx) assert(xxx)
477 #define VG_(xxxx) xxxx
478 #define tool_panic(zzz) panic(zzz)
479 #define MC_(zzzz) MC_##zzzz
480 #define TL_(zzzz) SK_##zzzz
481
482
483 static void MC_helperc_complain_undef ( void );
484 static void MC_helperc_LOADV8 ( void );
485 static void MC_helperc_LOADV4 ( void );
486 static void MC_helperc_LOADV2 ( void );
487 static void MC_helperc_LOADV1 ( void );
488 static void MC_helperc_STOREV8( void );
489 static void MC_helperc_STOREV4( void );
490 static void MC_helperc_STOREV2( void );
491 static void MC_helperc_STOREV1( void );
492 static void MC_helperc_value_check0_fail( void );
493 static void MC_helperc_value_check1_fail( void );
494 static void MC_helperc_value_check4_fail( void );
495
MC_helperc_complain_undef(void)496 static void MC_helperc_complain_undef ( void ) { }
MC_helperc_LOADV8(void)497 static void MC_helperc_LOADV8 ( void ) { }
MC_helperc_LOADV4(void)498 static void MC_helperc_LOADV4 ( void ) { }
MC_helperc_LOADV2(void)499 static void MC_helperc_LOADV2 ( void ) { }
MC_helperc_LOADV1(void)500 static void MC_helperc_LOADV1 ( void ) { }
MC_helperc_STOREV8(void)501 static void MC_helperc_STOREV8( void ) { }
MC_helperc_STOREV4(void)502 static void MC_helperc_STOREV4( void ) { }
MC_helperc_STOREV2(void)503 static void MC_helperc_STOREV2( void ) { }
MC_helperc_STOREV1(void)504 static void MC_helperc_STOREV1( void ) { }
MC_helperc_value_check0_fail(void)505 static void MC_helperc_value_check0_fail( void ) { }
MC_helperc_value_check1_fail(void)506 static void MC_helperc_value_check1_fail( void ) { }
MC_helperc_value_check4_fail(void)507 static void MC_helperc_value_check4_fail( void ) { }
508
509
510 /*--------------------------------------------------------------------*/
511 /*--- Instrument IR to perform memory checking operations. ---*/
512 /*--- mc_translate.c ---*/
513 /*--------------------------------------------------------------------*/
514
515 /*
516 This file is part of MemCheck, a heavyweight Valgrind tool for
517 detecting memory errors.
518
519 Copyright (C) 2000-2015 Julian Seward
520 jseward@acm.org
521
522 This program is free software; you can redistribute it and/or
523 modify it under the terms of the GNU General Public License as
524 published by the Free Software Foundation; either version 2 of the
525 License, or (at your option) any later version.
526
527 This program is distributed in the hope that it will be useful, but
528 WITHOUT ANY WARRANTY; without even the implied warranty of
529 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
530 General Public License for more details.
531
532 You should have received a copy of the GNU General Public License
533 along with this program; if not, write to the Free Software
534 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
535 02111-1307, USA.
536
537 The GNU General Public License is contained in the file COPYING.
538 */
539
540 //#include "mc_include.h"
541
542
543 /*------------------------------------------------------------*/
544 /*--- Forward decls ---*/
545 /*------------------------------------------------------------*/
546
547 struct _MCEnv;
548
549 static IRType shadowType ( IRType ty );
550 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
551
552
553 /*------------------------------------------------------------*/
554 /*--- Memcheck running state, and tmp management. ---*/
555 /*------------------------------------------------------------*/
556
557 /* Carries around state during memcheck instrumentation. */
558 typedef
559 struct _MCEnv {
560 /* MODIFIED: the bb being constructed. IRStmts are added. */
561 IRSB* bb;
562
563 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
564 original temps to their current their current shadow temp.
565 Initially all entries are IRTemp_INVALID. Entries are added
566 lazily since many original temps are not used due to
567 optimisation prior to instrumentation. Note that floating
568 point original tmps are shadowed by integer tmps of the same
569 size, and Bit-typed original tmps are shadowed by the type
570 Ity_I8. See comment below. */
571 IRTemp* tmpMap;
572 Int n_originalTmps; /* for range checking */
573
574 /* READONLY: the guest layout. This indicates which parts of
575 the guest state should be regarded as 'always defined'. */
576 VexGuestLayout* layout;
577 /* READONLY: the host word type. Needed for constructing
578 arguments of type 'HWord' to be passed to helper functions.
579 Ity_I32 or Ity_I64 only. */
580 IRType hWordTy;
581 }
582 MCEnv;
583
584 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
585 demand), as they are encountered. This is for two reasons.
586
587 (1) (less important reason): Many original tmps are unused due to
588 initial IR optimisation, and we do not want to spaces in tables
589 tracking them.
590
591 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
592 table indexed [0 .. n_types-1], which gives the current shadow for
593 each original tmp, or INVALID_IRTEMP if none is so far assigned.
594 It is necessary to support making multiple assignments to a shadow
595 -- specifically, after testing a shadow for definedness, it needs
596 to be made defined. But IR's SSA property disallows this.
597
598 (2) (more important reason): Therefore, when a shadow needs to get
599 a new value, a new temporary is created, the value is assigned to
600 that, and the tmpMap is updated to reflect the new binding.
601
602 A corollary is that if the tmpMap maps a given tmp to
603 INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
604 there's a read-before-write error in the original tmps. The IR
605 sanity checker should catch all such anomalies, however.
606 */
607
608 /* Find the tmp currently shadowing the given original tmp. If none
609 so far exists, allocate one. */
findShadowTmp(MCEnv * mce,IRTemp orig)610 static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
611 {
612 tl_assert(orig < mce->n_originalTmps);
613 if (mce->tmpMap[orig] == IRTemp_INVALID) {
614 mce->tmpMap[orig]
615 = newIRTemp(mce->bb->tyenv,
616 shadowType(mce->bb->tyenv->types[orig]));
617 }
618 return mce->tmpMap[orig];
619 }
620
621 /* Allocate a new shadow for the given original tmp. This means any
622 previous shadow is abandoned. This is needed because it is
623 necessary to give a new value to a shadow once it has been tested
624 for undefinedness, but unfortunately IR's SSA property disallows
625 this. Instead we must abandon the old shadow, allocate a new one
626 and use that instead. */
newShadowTmp(MCEnv * mce,IRTemp orig)627 static void newShadowTmp ( MCEnv* mce, IRTemp orig )
628 {
629 tl_assert(orig < mce->n_originalTmps);
630 mce->tmpMap[orig]
631 = newIRTemp(mce->bb->tyenv,
632 shadowType(mce->bb->tyenv->types[orig]));
633 }
634
635
636 /*------------------------------------------------------------*/
637 /*--- IRAtoms -- a subset of IRExprs ---*/
638 /*------------------------------------------------------------*/
639
640 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
641 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
642 input, most of this code deals in atoms. Usefully, a value atom
643 always has a V-value which is also an atom: constants are shadowed
644 by constants, and temps are shadowed by the corresponding shadow
645 temporary. */
646
647 typedef IRExpr IRAtom;
648
649 /* (used for sanity checks only): is this an atom which looks
650 like it's from original code? */
isOriginalAtom(MCEnv * mce,IRAtom * a1)651 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
652 {
653 if (a1->tag == Iex_Const)
654 return True;
655 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
656 return True;
657 return False;
658 }
659
660 /* (used for sanity checks only): is this an atom which looks
661 like it's from shadow code? */
isShadowAtom(MCEnv * mce,IRAtom * a1)662 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
663 {
664 if (a1->tag == Iex_Const)
665 return True;
666 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
667 return True;
668 return False;
669 }
670
671 /* (used for sanity checks only): check that both args are atoms and
672 are identically-kinded. */
sameKindedAtoms(IRAtom * a1,IRAtom * a2)673 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
674 {
675 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
676 return True;
677 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
678 return True;
679 return False;
680 }
681
682
683 /*------------------------------------------------------------*/
684 /*--- Type management ---*/
685 /*------------------------------------------------------------*/
686
687 /* Shadow state is always accessed using integer types. This returns
688 an integer type with the same size (as per sizeofIRType) as the
689 given type. The only valid shadow types are Bit, I8, I16, I32,
690 I64, V128. */
691
shadowType(IRType ty)692 static IRType shadowType ( IRType ty )
693 {
694 switch (ty) {
695 case Ity_I1:
696 case Ity_I8:
697 case Ity_I16:
698 case Ity_I32:
699 case Ity_I64: return ty;
700 case Ity_F32: return Ity_I32;
701 case Ity_F64: return Ity_I64;
702 case Ity_V128: return Ity_V128;
703 default: ppIRType(ty);
704 VG_(tool_panic)("memcheck:shadowType");
705 }
706 }
707
708 /* Produce a 'defined' value of the given shadow type. Should only be
709 supplied shadow types (Bit/I8/I16/I32/UI64). */
definedOfType(IRType ty)710 static IRExpr* definedOfType ( IRType ty ) {
711 switch (ty) {
712 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
713 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
714 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
715 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
716 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
717 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
718 default: VG_(tool_panic)("memcheck:definedOfType");
719 }
720 }
721
722
723 /*------------------------------------------------------------*/
724 /*--- Constructing IR fragments ---*/
725 /*------------------------------------------------------------*/
726
727 /* assign value to tmp */
728 #define assign(_bb,_tmp,_expr) \
729 addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
730
731 /* add stmt to a bb */
732 #define stmt(_bb,_stmt) \
733 addStmtToIRSB((_bb), (_stmt))
734
735 /* build various kinds of expressions */
736 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
737 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
738 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
739 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
740 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
741 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
742 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
743 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
744
745 /* bind the given expression to a new temporary, and return the
746 temporary. This effectively converts an arbitrary expression into
747 an atom. */
assignNew(MCEnv * mce,IRType ty,IRExpr * e)748 static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
749 IRTemp t = newIRTemp(mce->bb->tyenv, ty);
750 assign(mce->bb, t, e);
751 return mkexpr(t);
752 }
753
754
755 /*------------------------------------------------------------*/
756 /*--- Constructing definedness primitive ops ---*/
757 /*------------------------------------------------------------*/
758
759 /* --------- Defined-if-either-defined --------- */
760
mkDifD8(MCEnv * mce,IRAtom * a1,IRAtom * a2)761 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
762 tl_assert(isShadowAtom(mce,a1));
763 tl_assert(isShadowAtom(mce,a2));
764 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
765 }
766
mkDifD16(MCEnv * mce,IRAtom * a1,IRAtom * a2)767 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
768 tl_assert(isShadowAtom(mce,a1));
769 tl_assert(isShadowAtom(mce,a2));
770 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
771 }
772
mkDifD32(MCEnv * mce,IRAtom * a1,IRAtom * a2)773 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
774 tl_assert(isShadowAtom(mce,a1));
775 tl_assert(isShadowAtom(mce,a2));
776 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
777 }
778
mkDifD64(MCEnv * mce,IRAtom * a1,IRAtom * a2)779 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
780 tl_assert(isShadowAtom(mce,a1));
781 tl_assert(isShadowAtom(mce,a2));
782 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
783 }
784
mkDifDV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)785 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
786 tl_assert(isShadowAtom(mce,a1));
787 tl_assert(isShadowAtom(mce,a2));
788 return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
789 }
790
791 /* --------- Undefined-if-either-undefined --------- */
792
mkUifU8(MCEnv * mce,IRAtom * a1,IRAtom * a2)793 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
794 tl_assert(isShadowAtom(mce,a1));
795 tl_assert(isShadowAtom(mce,a2));
796 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
797 }
798
mkUifU16(MCEnv * mce,IRAtom * a1,IRAtom * a2)799 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
800 tl_assert(isShadowAtom(mce,a1));
801 tl_assert(isShadowAtom(mce,a2));
802 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
803 }
804
mkUifU32(MCEnv * mce,IRAtom * a1,IRAtom * a2)805 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
806 tl_assert(isShadowAtom(mce,a1));
807 tl_assert(isShadowAtom(mce,a2));
808 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
809 }
810
mkUifU64(MCEnv * mce,IRAtom * a1,IRAtom * a2)811 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
812 tl_assert(isShadowAtom(mce,a1));
813 tl_assert(isShadowAtom(mce,a2));
814 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
815 }
816
mkUifUV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)817 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
818 tl_assert(isShadowAtom(mce,a1));
819 tl_assert(isShadowAtom(mce,a2));
820 return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
821 }
822
mkUifU(MCEnv * mce,IRType vty,IRAtom * a1,IRAtom * a2)823 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
824 switch (vty) {
825 case Ity_I8: return mkUifU8(mce, a1, a2);
826 case Ity_I16: return mkUifU16(mce, a1, a2);
827 case Ity_I32: return mkUifU32(mce, a1, a2);
828 case Ity_I64: return mkUifU64(mce, a1, a2);
829 case Ity_V128: return mkUifUV128(mce, a1, a2);
830 default:
831 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
832 VG_(tool_panic)("memcheck:mkUifU");
833 }
834 }
835
836 /* --------- The Left-family of operations. --------- */
837
mkLeft8(MCEnv * mce,IRAtom * a1)838 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
839 tl_assert(isShadowAtom(mce,a1));
840 /* It's safe to duplicate a1 since it's only an atom */
841 return assignNew(mce, Ity_I8,
842 binop(Iop_Or8, a1,
843 assignNew(mce, Ity_I8,
844 /* unop(Iop_Neg8, a1)))); */
845 binop(Iop_Sub8, mkU8(0), a1) )));
846 }
847
mkLeft16(MCEnv * mce,IRAtom * a1)848 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
849 tl_assert(isShadowAtom(mce,a1));
850 /* It's safe to duplicate a1 since it's only an atom */
851 return assignNew(mce, Ity_I16,
852 binop(Iop_Or16, a1,
853 assignNew(mce, Ity_I16,
854 /* unop(Iop_Neg16, a1)))); */
855 binop(Iop_Sub16, mkU16(0), a1) )));
856 }
857
mkLeft32(MCEnv * mce,IRAtom * a1)858 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
859 tl_assert(isShadowAtom(mce,a1));
860 /* It's safe to duplicate a1 since it's only an atom */
861 return assignNew(mce, Ity_I32,
862 binop(Iop_Or32, a1,
863 assignNew(mce, Ity_I32,
864 /* unop(Iop_Neg32, a1)))); */
865 binop(Iop_Sub32, mkU32(0), a1) )));
866 }
867
868 /* --------- 'Improvement' functions for AND/OR. --------- */
869
870 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
871 defined (0); all other -> undefined (1).
872 */
mkImproveAND8(MCEnv * mce,IRAtom * data,IRAtom * vbits)873 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
874 {
875 tl_assert(isOriginalAtom(mce, data));
876 tl_assert(isShadowAtom(mce, vbits));
877 tl_assert(sameKindedAtoms(data, vbits));
878 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
879 }
880
mkImproveAND16(MCEnv * mce,IRAtom * data,IRAtom * vbits)881 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
882 {
883 tl_assert(isOriginalAtom(mce, data));
884 tl_assert(isShadowAtom(mce, vbits));
885 tl_assert(sameKindedAtoms(data, vbits));
886 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
887 }
888
mkImproveAND32(MCEnv * mce,IRAtom * data,IRAtom * vbits)889 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
890 {
891 tl_assert(isOriginalAtom(mce, data));
892 tl_assert(isShadowAtom(mce, vbits));
893 tl_assert(sameKindedAtoms(data, vbits));
894 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
895 }
896
mkImproveAND64(MCEnv * mce,IRAtom * data,IRAtom * vbits)897 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
898 {
899 tl_assert(isOriginalAtom(mce, data));
900 tl_assert(isShadowAtom(mce, vbits));
901 tl_assert(sameKindedAtoms(data, vbits));
902 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
903 }
904
mkImproveANDV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)905 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
906 {
907 tl_assert(isOriginalAtom(mce, data));
908 tl_assert(isShadowAtom(mce, vbits));
909 tl_assert(sameKindedAtoms(data, vbits));
910 return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
911 }
912
913 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
914 defined (0); all other -> undefined (1).
915 */
mkImproveOR8(MCEnv * mce,IRAtom * data,IRAtom * vbits)916 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
917 {
918 tl_assert(isOriginalAtom(mce, data));
919 tl_assert(isShadowAtom(mce, vbits));
920 tl_assert(sameKindedAtoms(data, vbits));
921 return assignNew(
922 mce, Ity_I8,
923 binop(Iop_Or8,
924 assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
925 vbits) );
926 }
927
mkImproveOR16(MCEnv * mce,IRAtom * data,IRAtom * vbits)928 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
929 {
930 tl_assert(isOriginalAtom(mce, data));
931 tl_assert(isShadowAtom(mce, vbits));
932 tl_assert(sameKindedAtoms(data, vbits));
933 return assignNew(
934 mce, Ity_I16,
935 binop(Iop_Or16,
936 assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
937 vbits) );
938 }
939
mkImproveOR32(MCEnv * mce,IRAtom * data,IRAtom * vbits)940 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
941 {
942 tl_assert(isOriginalAtom(mce, data));
943 tl_assert(isShadowAtom(mce, vbits));
944 tl_assert(sameKindedAtoms(data, vbits));
945 return assignNew(
946 mce, Ity_I32,
947 binop(Iop_Or32,
948 assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
949 vbits) );
950 }
951
mkImproveOR64(MCEnv * mce,IRAtom * data,IRAtom * vbits)952 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
953 {
954 tl_assert(isOriginalAtom(mce, data));
955 tl_assert(isShadowAtom(mce, vbits));
956 tl_assert(sameKindedAtoms(data, vbits));
957 return assignNew(
958 mce, Ity_I64,
959 binop(Iop_Or64,
960 assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
961 vbits) );
962 }
963
mkImproveORV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)964 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
965 {
966 tl_assert(isOriginalAtom(mce, data));
967 tl_assert(isShadowAtom(mce, vbits));
968 tl_assert(sameKindedAtoms(data, vbits));
969 return assignNew(
970 mce, Ity_V128,
971 binop(Iop_OrV128,
972 assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
973 vbits) );
974 }
975
976 /* --------- Pessimising casts. --------- */
977
mkPCastTo(MCEnv * mce,IRType dst_ty,IRAtom * vbits)978 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
979 {
980 IRType ty;
981 IRAtom* tmp1;
982 /* Note, dst_ty is a shadow type, not an original type. */
983 /* First of all, collapse vbits down to a single bit. */
984 tl_assert(isShadowAtom(mce,vbits));
985 ty = typeOfIRExpr(mce->bb->tyenv, vbits);
986 tmp1 = NULL;
987 switch (ty) {
988 case Ity_I1:
989 tmp1 = vbits;
990 break;
991 case Ity_I8:
992 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
993 break;
994 case Ity_I16:
995 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
996 break;
997 case Ity_I32:
998 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
999 break;
1000 case Ity_I64:
1001 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
1002 break;
1003 default:
1004 VG_(tool_panic)("mkPCastTo(1)");
1005 }
1006 tl_assert(tmp1);
1007 /* Now widen up to the dst type. */
1008 switch (dst_ty) {
1009 case Ity_I1:
1010 return tmp1;
1011 case Ity_I8:
1012 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
1013 case Ity_I16:
1014 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
1015 case Ity_I32:
1016 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
1017 case Ity_I64:
1018 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1019 case Ity_V128:
1020 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1021 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
1022 return tmp1;
1023 default:
1024 ppIRType(dst_ty);
1025 VG_(tool_panic)("mkPCastTo(2)");
1026 }
1027 }
1028
1029
1030 /*------------------------------------------------------------*/
1031 /*--- Emit a test and complaint if something is undefined. ---*/
1032 /*------------------------------------------------------------*/
1033
1034 /* Set the annotations on a dirty helper to indicate that the stack
1035 pointer and instruction pointers might be read. This is the
1036 behaviour of all 'emit-a-complaint' style functions we might
1037 call. */
1038
setHelperAnns(MCEnv * mce,IRDirty * di)1039 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1040 di->nFxState = 2;
1041 di->fxState[0].fx = Ifx_Read;
1042 di->fxState[0].offset = mce->layout->offset_SP;
1043 di->fxState[0].size = mce->layout->sizeof_SP;
1044 di->fxState[1].fx = Ifx_Read;
1045 di->fxState[1].offset = mce->layout->offset_IP;
1046 di->fxState[1].size = mce->layout->sizeof_IP;
1047 }
1048
1049
1050 /* Check the supplied **original** atom for undefinedness, and emit a
1051 complaint if so. Once that happens, mark it as defined. This is
1052 possible because the atom is either a tmp or literal. If it's a
1053 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1054 be defined. In fact as mentioned above, we will have to allocate a
1055 new tmp to carry the new 'defined' shadow value, and update the
1056 original->tmp mapping accordingly; we cannot simply assign a new
1057 value to an existing shadow tmp as this breaks SSAness -- resulting
1058 in the post-instrumentation sanity checker spluttering in disapproval.
1059 */
complainIfUndefined(MCEnv * mce,IRAtom * atom)1060 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1061 {
1062 IRAtom* vatom;
1063 IRType ty;
1064 Int sz;
1065 IRDirty* di;
1066 IRAtom* cond;
1067
1068 /* Since the original expression is atomic, there's no duplicated
1069 work generated by making multiple V-expressions for it. So we
1070 don't really care about the possibility that someone else may
1071 also create a V-interpretion for it. */
1072 tl_assert(isOriginalAtom(mce, atom));
1073 vatom = expr2vbits( mce, atom );
1074 tl_assert(isShadowAtom(mce, vatom));
1075 tl_assert(sameKindedAtoms(atom, vatom));
1076
1077 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1078
1079 /* sz is only used for constructing the error message */
1080 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1081
1082 cond = mkPCastTo( mce, Ity_I1, vatom );
1083 /* cond will be 0 if all defined, and 1 if any not defined. */
1084
1085 switch (sz) {
1086 case 0:
1087 di = unsafeIRDirty_0_N( 0/*regparms*/,
1088 "MC_(helperc_value_check0_fail)",
1089 &MC_(helperc_value_check0_fail),
1090 mkIRExprVec_0()
1091 );
1092 break;
1093 case 1:
1094 di = unsafeIRDirty_0_N( 0/*regparms*/,
1095 "MC_(helperc_value_check1_fail)",
1096 &MC_(helperc_value_check1_fail),
1097 mkIRExprVec_0()
1098 );
1099 break;
1100 case 4:
1101 di = unsafeIRDirty_0_N( 0/*regparms*/,
1102 "MC_(helperc_value_check4_fail)",
1103 &MC_(helperc_value_check4_fail),
1104 mkIRExprVec_0()
1105 );
1106 break;
1107 default:
1108 di = unsafeIRDirty_0_N( 1/*regparms*/,
1109 "MC_(helperc_complain_undef)",
1110 &MC_(helperc_complain_undef),
1111 mkIRExprVec_1( mkIRExpr_HWord( sz ))
1112 );
1113 break;
1114 }
1115 di->guard = cond;
1116 setHelperAnns( mce, di );
1117 stmt( mce->bb, IRStmt_Dirty(di));
1118
1119 /* Set the shadow tmp to be defined. First, update the
1120 orig->shadow tmp mapping to reflect the fact that this shadow is
1121 getting a new value. */
1122 tl_assert(isIRAtom(vatom));
1123 /* sameKindedAtoms ... */
1124 if (vatom->tag == Iex_RdTmp) {
1125 tl_assert(atom->tag == Iex_RdTmp);
1126 newShadowTmp(mce, atom->Iex.RdTmp.tmp);
1127 assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
1128 definedOfType(ty));
1129 }
1130 }
1131
1132
1133 /*------------------------------------------------------------*/
1134 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1135 /*------------------------------------------------------------*/
1136
1137 /* Examine the always-defined sections declared in layout to see if
1138 the (offset,size) section is within one. Note, is is an error to
1139 partially fall into such a region: (offset,size) should either be
1140 completely in such a region or completely not-in such a region.
1141 */
isAlwaysDefd(MCEnv * mce,Int offset,Int size)1142 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1143 {
1144 Int minoffD, maxoffD, i;
1145 Int minoff = offset;
1146 Int maxoff = minoff + size - 1;
1147 tl_assert((minoff & ~0xFFFF) == 0);
1148 tl_assert((maxoff & ~0xFFFF) == 0);
1149
1150 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1151 minoffD = mce->layout->alwaysDefd[i].offset;
1152 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1153 tl_assert((minoffD & ~0xFFFF) == 0);
1154 tl_assert((maxoffD & ~0xFFFF) == 0);
1155
1156 if (maxoff < minoffD || maxoffD < minoff)
1157 continue; /* no overlap */
1158 if (minoff >= minoffD && maxoff <= maxoffD)
1159 return True; /* completely contained in an always-defd section */
1160
1161 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1162 }
1163 return False; /* could not find any containing section */
1164 }
1165
1166
1167 /* Generate into bb suitable actions to shadow this Put. If the state
1168 slice is marked 'always defined', do nothing. Otherwise, write the
1169 supplied V bits to the shadow state. We can pass in either an
1170 original atom or a V-atom, but not both. In the former case the
1171 relevant V-bits are then generated from the original.
1172 */
1173 static
do_shadow_PUT(MCEnv * mce,Int offset,IRAtom * atom,IRAtom * vatom)1174 void do_shadow_PUT ( MCEnv* mce, Int offset,
1175 IRAtom* atom, IRAtom* vatom )
1176 {
1177 IRType ty;
1178 if (atom) {
1179 tl_assert(!vatom);
1180 tl_assert(isOriginalAtom(mce, atom));
1181 vatom = expr2vbits( mce, atom );
1182 } else {
1183 tl_assert(vatom);
1184 tl_assert(isShadowAtom(mce, vatom));
1185 }
1186
1187 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1188 tl_assert(ty != Ity_I1);
1189 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1190 /* later: no ... */
1191 /* emit code to emit a complaint if any of the vbits are 1. */
1192 /* complainIfUndefined(mce, atom); */
1193 } else {
1194 /* Do a plain shadow Put. */
1195 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1196 }
1197 }
1198
1199
1200 /* Return an expression which contains the V bits corresponding to the
1201 given GETI (passed in in pieces).
1202 */
1203 static
do_shadow_PUTI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias,IRAtom * atom)1204 void do_shadow_PUTI ( MCEnv* mce,
1205 IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
1206 {
1207 IRAtom* vatom;
1208 IRType ty, tyS;
1209 Int arrSize;;
1210
1211 tl_assert(isOriginalAtom(mce,atom));
1212 vatom = expr2vbits( mce, atom );
1213 tl_assert(sameKindedAtoms(atom, vatom));
1214 ty = descr->elemTy;
1215 tyS = shadowType(ty);
1216 arrSize = descr->nElems * sizeofIRType(ty);
1217 tl_assert(ty != Ity_I1);
1218 tl_assert(isOriginalAtom(mce,ix));
1219 complainIfUndefined(mce,ix);
1220 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1221 /* later: no ... */
1222 /* emit code to emit a complaint if any of the vbits are 1. */
1223 /* complainIfUndefined(mce, atom); */
1224 } else {
1225 /* Do a cloned version of the Put that refers to the shadow
1226 area. */
1227 IRRegArray* new_descr
1228 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1229 tyS, descr->nElems);
1230 stmt( mce->bb, IRStmt_PutI( mkIRPutI( new_descr, ix, bias, vatom ) ));
1231 }
1232 }
1233
1234
1235 /* Return an expression which contains the V bits corresponding to the
1236 given GET (passed in in pieces).
1237 */
1238 static
shadow_GET(MCEnv * mce,Int offset,IRType ty)1239 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1240 {
1241 IRType tyS = shadowType(ty);
1242 tl_assert(ty != Ity_I1);
1243 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1244 /* Always defined, return all zeroes of the relevant type */
1245 return definedOfType(tyS);
1246 } else {
1247 /* return a cloned version of the Get that refers to the shadow
1248 area. */
1249 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1250 }
1251 }
1252
1253
1254 /* Return an expression which contains the V bits corresponding to the
1255 given GETI (passed in in pieces).
1256 */
1257 static
shadow_GETI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias)1258 IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
1259 {
1260 IRType ty = descr->elemTy;
1261 IRType tyS = shadowType(ty);
1262 Int arrSize = descr->nElems * sizeofIRType(ty);
1263 tl_assert(ty != Ity_I1);
1264 tl_assert(isOriginalAtom(mce,ix));
1265 complainIfUndefined(mce,ix);
1266 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1267 /* Always defined, return all zeroes of the relevant type */
1268 return definedOfType(tyS);
1269 } else {
1270 /* return a cloned version of the Get that refers to the shadow
1271 area. */
1272 IRRegArray* new_descr
1273 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1274 tyS, descr->nElems);
1275 return IRExpr_GetI( new_descr, ix, bias );
1276 }
1277 }
1278
1279
1280 /*------------------------------------------------------------*/
1281 /*--- Generating approximations for unknown operations, ---*/
1282 /*--- using lazy-propagate semantics ---*/
1283 /*------------------------------------------------------------*/
1284
1285 /* Lazy propagation of undefinedness from two values, resulting in the
1286 specified shadow type.
1287 */
1288 static
mkLazy2(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2)1289 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1290 {
1291 /* force everything via 32-bit intermediaries. */
1292 IRAtom* at;
1293 tl_assert(isShadowAtom(mce,va1));
1294 tl_assert(isShadowAtom(mce,va2));
1295 at = mkPCastTo(mce, Ity_I32, va1);
1296 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1297 at = mkPCastTo(mce, finalVty, at);
1298 return at;
1299 }
1300
1301
1302 /* Do the lazy propagation game from a null-terminated vector of
1303 atoms. This is presumably the arguments to a helper call, so the
1304 IRCallee info is also supplied in order that we can know which
1305 arguments should be ignored (via the .mcx_mask field).
1306 */
1307 static
mkLazyN(MCEnv * mce,IRAtom ** exprvec,IRType finalVtype,IRCallee * cee)1308 IRAtom* mkLazyN ( MCEnv* mce,
1309 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1310 {
1311 Int i;
1312 IRAtom* here;
1313 IRAtom* curr = definedOfType(Ity_I32);
1314 for (i = 0; exprvec[i]; i++) {
1315 tl_assert(i < 32);
1316 tl_assert(isOriginalAtom(mce, exprvec[i]));
1317 /* Only take notice of this arg if the callee's mc-exclusion
1318 mask does not say it is to be excluded. */
1319 if (cee->mcx_mask & (1<<i)) {
1320 /* the arg is to be excluded from definedness checking. Do
1321 nothing. */
1322 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1323 } else {
1324 /* calculate the arg's definedness, and pessimistically merge
1325 it in. */
1326 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1327 curr = mkUifU32(mce, here, curr);
1328 }
1329 }
1330 return mkPCastTo(mce, finalVtype, curr );
1331 }
1332
1333
1334 /*------------------------------------------------------------*/
1335 /*--- Generating expensive sequences for exact carry-chain ---*/
1336 /*--- propagation in add/sub and related operations. ---*/
1337 /*------------------------------------------------------------*/
1338
1339 static
1340 __attribute__((unused))
expensiveAdd32(MCEnv * mce,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1341 IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
1342 IRAtom* aa, IRAtom* bb )
1343 {
1344 IRAtom *a_min, *b_min, *a_max, *b_max;
1345 IRType ty;
1346 IROp opAND, opOR, opXOR, opNOT, opADD;
1347
1348 tl_assert(isShadowAtom(mce,qaa));
1349 tl_assert(isShadowAtom(mce,qbb));
1350 tl_assert(isOriginalAtom(mce,aa));
1351 tl_assert(isOriginalAtom(mce,bb));
1352 tl_assert(sameKindedAtoms(qaa,aa));
1353 tl_assert(sameKindedAtoms(qbb,bb));
1354
1355 ty = Ity_I32;
1356 opAND = Iop_And32;
1357 opOR = Iop_Or32;
1358 opXOR = Iop_Xor32;
1359 opNOT = Iop_Not32;
1360 opADD = Iop_Add32;
1361
1362 // a_min = aa & ~qaa
1363 a_min = assignNew(mce,ty,
1364 binop(opAND, aa,
1365 assignNew(mce,ty, unop(opNOT, qaa))));
1366
1367 // b_min = bb & ~qbb
1368 b_min = assignNew(mce,ty,
1369 binop(opAND, bb,
1370 assignNew(mce,ty, unop(opNOT, qbb))));
1371
1372 // a_max = aa | qaa
1373 a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1374
1375 // b_max = bb | qbb
1376 b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1377
1378 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1379 return
1380 assignNew(mce,ty,
1381 binop( opOR,
1382 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1383 assignNew(mce,ty,
1384 binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
1385 assignNew(mce,ty, binop(opADD, a_max, b_max))
1386 )
1387 )
1388 )
1389 );
1390 }
1391
1392
1393 /*------------------------------------------------------------*/
1394 /*--- Helpers for dealing with vector primops. ---*/
1395 /*------------------------------------------------------------*/
1396
1397 /* Vector pessimisation -- pessimise within each lane individually. */
1398
mkPCast8x16(MCEnv * mce,IRAtom * at)1399 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1400 {
1401 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1402 }
1403
mkPCast16x8(MCEnv * mce,IRAtom * at)1404 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1405 {
1406 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1407 }
1408
mkPCast32x4(MCEnv * mce,IRAtom * at)1409 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1410 {
1411 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1412 }
1413
mkPCast64x2(MCEnv * mce,IRAtom * at)1414 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1415 {
1416 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1417 }
1418
1419
1420 /* Here's a simple scheme capable of handling ops derived from SSE1
1421 code and while only generating ops that can be efficiently
1422 implemented in SSE1. */
1423
1424 /* All-lanes versions are straightforward:
1425
1426 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
1427
1428 unary32Fx4(x,y) ==> PCast32x4(x#)
1429
1430 Lowest-lane-only versions are more complex:
1431
1432 binary32F0x4(x,y) ==> SetV128lo32(
1433 x#,
1434 PCast32(V128to32(UifUV128(x#,y#)))
1435 )
1436
1437 This is perhaps not so obvious. In particular, it's faster to
1438 do a V128-bit UifU and then take the bottom 32 bits than the more
1439 obvious scheme of taking the bottom 32 bits of each operand
1440 and doing a 32-bit UifU. Basically since UifU is fast and
1441 chopping lanes off vector values is slow.
1442
1443 Finally:
1444
1445 unary32F0x4(x) ==> SetV128lo32(
1446 x#,
1447 PCast32(V128to32(x#))
1448 )
1449
1450 Where:
1451
1452 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1453 PCast32x4(v#) = CmpNEZ32x4(v#)
1454 */
1455
1456 static
binary32Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1457 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1458 {
1459 IRAtom* at;
1460 tl_assert(isShadowAtom(mce, vatomX));
1461 tl_assert(isShadowAtom(mce, vatomY));
1462 at = mkUifUV128(mce, vatomX, vatomY);
1463 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
1464 return at;
1465 }
1466
1467 static
unary32Fx4(MCEnv * mce,IRAtom * vatomX)1468 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1469 {
1470 IRAtom* at;
1471 tl_assert(isShadowAtom(mce, vatomX));
1472 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
1473 return at;
1474 }
1475
1476 static
binary32F0x4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1477 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1478 {
1479 IRAtom* at;
1480 tl_assert(isShadowAtom(mce, vatomX));
1481 tl_assert(isShadowAtom(mce, vatomY));
1482 at = mkUifUV128(mce, vatomX, vatomY);
1483 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
1484 at = mkPCastTo(mce, Ity_I32, at);
1485 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1486 return at;
1487 }
1488
1489 static
unary32F0x4(MCEnv * mce,IRAtom * vatomX)1490 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1491 {
1492 IRAtom* at;
1493 tl_assert(isShadowAtom(mce, vatomX));
1494 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
1495 at = mkPCastTo(mce, Ity_I32, at);
1496 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1497 return at;
1498 }
1499
1500 /* --- ... and ... 64Fx2 versions of the same ... --- */
1501
1502 static
binary64Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1503 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1504 {
1505 IRAtom* at;
1506 tl_assert(isShadowAtom(mce, vatomX));
1507 tl_assert(isShadowAtom(mce, vatomY));
1508 at = mkUifUV128(mce, vatomX, vatomY);
1509 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
1510 return at;
1511 }
1512
1513 static
unary64Fx2(MCEnv * mce,IRAtom * vatomX)1514 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1515 {
1516 IRAtom* at;
1517 tl_assert(isShadowAtom(mce, vatomX));
1518 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
1519 return at;
1520 }
1521
1522 static
binary64F0x2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1523 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1524 {
1525 IRAtom* at;
1526 tl_assert(isShadowAtom(mce, vatomX));
1527 tl_assert(isShadowAtom(mce, vatomY));
1528 at = mkUifUV128(mce, vatomX, vatomY);
1529 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
1530 at = mkPCastTo(mce, Ity_I64, at);
1531 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1532 return at;
1533 }
1534
1535 static
unary64F0x2(MCEnv * mce,IRAtom * vatomX)1536 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1537 {
1538 IRAtom* at;
1539 tl_assert(isShadowAtom(mce, vatomX));
1540 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
1541 at = mkPCastTo(mce, Ity_I64, at);
1542 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1543 return at;
1544 }
1545
1546 /* --- --- Vector saturated narrowing --- --- */
1547
1548 /* This is quite subtle. What to do is simple:
1549
1550 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1551
1552 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1553
1554 Why this is right is not so simple. Consider a lane in the args,
1555 vatom1 or 2, doesn't matter.
1556
1557 After the PCast, that lane is all 0s (defined) or all
1558 1s(undefined).
1559
1560 Both signed and unsigned saturating narrowing of all 0s produces
1561 all 0s, which is what we want.
1562
1563 The all-1s case is more complex. Unsigned narrowing interprets an
1564 all-1s input as the largest unsigned integer, and so produces all
1565 1s as a result since that is the largest unsigned value at the
1566 smaller width.
1567
1568 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1569 to -1, so we still wind up with all 1s at the smaller width.
1570
1571 So: In short, pessimise the args, then apply the original narrowing
1572 op.
1573 */
1574 static
vectorNarrowV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)1575 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1576 IRAtom* vatom1, IRAtom* vatom2)
1577 {
1578 IRAtom *at1, *at2, *at3;
1579 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1580 switch (narrow_op) {
1581 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
1582 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
1583 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
1584 default: VG_(tool_panic)("vectorNarrowV128");
1585 }
1586 tl_assert(isShadowAtom(mce,vatom1));
1587 tl_assert(isShadowAtom(mce,vatom2));
1588 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1589 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1590 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1591 return at3;
1592 }
1593
1594
1595 /* --- --- Vector integer arithmetic --- --- */
1596
1597 /* Simple ... UifU the args and per-lane pessimise the results. */
1598 static
binary8Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1599 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1600 {
1601 IRAtom* at;
1602 at = mkUifUV128(mce, vatom1, vatom2);
1603 at = mkPCast8x16(mce, at);
1604 return at;
1605 }
1606
1607 static
binary16Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1608 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1609 {
1610 IRAtom* at;
1611 at = mkUifUV128(mce, vatom1, vatom2);
1612 at = mkPCast16x8(mce, at);
1613 return at;
1614 }
1615
1616 static
binary32Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1617 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1618 {
1619 IRAtom* at;
1620 at = mkUifUV128(mce, vatom1, vatom2);
1621 at = mkPCast32x4(mce, at);
1622 return at;
1623 }
1624
1625 static
binary64Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1626 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1627 {
1628 IRAtom* at;
1629 at = mkUifUV128(mce, vatom1, vatom2);
1630 at = mkPCast64x2(mce, at);
1631 return at;
1632 }
1633
1634
1635 /*------------------------------------------------------------*/
1636 /*--- Generate shadow values from all kinds of IRExprs. ---*/
1637 /*------------------------------------------------------------*/
1638
1639 static
expr2vbits_Binop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2)1640 IRAtom* expr2vbits_Binop ( MCEnv* mce,
1641 IROp op,
1642 IRAtom* atom1, IRAtom* atom2 )
1643 {
1644 IRType and_or_ty;
1645 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
1646 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
1647 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1648
1649 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1650 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1651
1652 tl_assert(isOriginalAtom(mce,atom1));
1653 tl_assert(isOriginalAtom(mce,atom2));
1654 tl_assert(isShadowAtom(mce,vatom1));
1655 tl_assert(isShadowAtom(mce,vatom2));
1656 tl_assert(sameKindedAtoms(atom1,vatom1));
1657 tl_assert(sameKindedAtoms(atom2,vatom2));
1658 switch (op) {
1659
1660 /* V128-bit SIMD (SSE2-esque) */
1661
1662 case Iop_ShrN16x8:
1663 case Iop_ShrN32x4:
1664 case Iop_ShrN64x2:
1665 case Iop_SarN16x8:
1666 case Iop_SarN32x4:
1667 case Iop_ShlN16x8:
1668 case Iop_ShlN32x4:
1669 case Iop_ShlN64x2:
1670 /* Same scheme as with all other shifts. */
1671 complainIfUndefined(mce, atom2);
1672 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1673
1674 case Iop_QSub8Ux16:
1675 case Iop_QSub8Sx16:
1676 case Iop_Sub8x16:
1677 case Iop_Min8Ux16:
1678 case Iop_Max8Ux16:
1679 case Iop_CmpGT8Sx16:
1680 case Iop_CmpEQ8x16:
1681 case Iop_Avg8Ux16:
1682 case Iop_QAdd8Ux16:
1683 case Iop_QAdd8Sx16:
1684 case Iop_Add8x16:
1685 return binary8Ix16(mce, vatom1, vatom2);
1686
1687 case Iop_QSub16Ux8:
1688 case Iop_QSub16Sx8:
1689 case Iop_Sub16x8:
1690 case Iop_Mul16x8:
1691 case Iop_MulHi16Sx8:
1692 case Iop_MulHi16Ux8:
1693 case Iop_Min16Sx8:
1694 case Iop_Max16Sx8:
1695 case Iop_CmpGT16Sx8:
1696 case Iop_CmpEQ16x8:
1697 case Iop_Avg16Ux8:
1698 case Iop_QAdd16Ux8:
1699 case Iop_QAdd16Sx8:
1700 case Iop_Add16x8:
1701 return binary16Ix8(mce, vatom1, vatom2);
1702
1703 case Iop_Sub32x4:
1704 case Iop_QSub32Sx4:
1705 case Iop_QSub32Ux4:
1706 case Iop_CmpGT32Sx4:
1707 case Iop_CmpEQ32x4:
1708 case Iop_Add32x4:
1709 case Iop_QAdd32Ux4:
1710 case Iop_QAdd32Sx4:
1711 return binary32Ix4(mce, vatom1, vatom2);
1712
1713 case Iop_Sub64x2:
1714 case Iop_QSub64Ux2:
1715 case Iop_QSub64Sx2:
1716 case Iop_Add64x2:
1717 case Iop_QAdd64Ux2:
1718 case Iop_QAdd64Sx2:
1719 return binary64Ix2(mce, vatom1, vatom2);
1720
1721 case Iop_QNarrowBin32Sto16Sx8:
1722 case Iop_QNarrowBin16Sto8Sx16:
1723 case Iop_QNarrowBin16Sto8Ux16:
1724 return vectorNarrowV128(mce, op, vatom1, vatom2);
1725
1726 case Iop_Sub64Fx2:
1727 case Iop_Mul64Fx2:
1728 case Iop_Min64Fx2:
1729 case Iop_Max64Fx2:
1730 case Iop_Div64Fx2:
1731 case Iop_CmpLT64Fx2:
1732 case Iop_CmpLE64Fx2:
1733 case Iop_CmpEQ64Fx2:
1734 case Iop_Add64Fx2:
1735 return binary64Fx2(mce, vatom1, vatom2);
1736
1737 case Iop_Sub64F0x2:
1738 case Iop_Mul64F0x2:
1739 case Iop_Min64F0x2:
1740 case Iop_Max64F0x2:
1741 case Iop_Div64F0x2:
1742 case Iop_CmpLT64F0x2:
1743 case Iop_CmpLE64F0x2:
1744 case Iop_CmpEQ64F0x2:
1745 case Iop_Add64F0x2:
1746 return binary64F0x2(mce, vatom1, vatom2);
1747
1748 /* V128-bit SIMD (SSE1-esque) */
1749
1750 case Iop_Sub32Fx4:
1751 case Iop_Mul32Fx4:
1752 case Iop_Min32Fx4:
1753 case Iop_Max32Fx4:
1754 case Iop_Div32Fx4:
1755 case Iop_CmpLT32Fx4:
1756 case Iop_CmpLE32Fx4:
1757 case Iop_CmpEQ32Fx4:
1758 case Iop_Add32Fx4:
1759 return binary32Fx4(mce, vatom1, vatom2);
1760
1761 case Iop_Sub32F0x4:
1762 case Iop_Mul32F0x4:
1763 case Iop_Min32F0x4:
1764 case Iop_Max32F0x4:
1765 case Iop_Div32F0x4:
1766 case Iop_CmpLT32F0x4:
1767 case Iop_CmpLE32F0x4:
1768 case Iop_CmpEQ32F0x4:
1769 case Iop_Add32F0x4:
1770 return binary32F0x4(mce, vatom1, vatom2);
1771
1772 /* V128-bit data-steering */
1773 case Iop_SetV128lo32:
1774 case Iop_SetV128lo64:
1775 case Iop_64HLtoV128:
1776 case Iop_InterleaveLO64x2:
1777 case Iop_InterleaveLO32x4:
1778 case Iop_InterleaveLO16x8:
1779 case Iop_InterleaveLO8x16:
1780 case Iop_InterleaveHI64x2:
1781 case Iop_InterleaveHI32x4:
1782 case Iop_InterleaveHI16x8:
1783 case Iop_InterleaveHI8x16:
1784 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1785
1786 /* Scalar floating point */
1787
1788 // case Iop_RoundF64:
1789 case Iop_F64toI64S:
1790 case Iop_I64StoF64:
1791 /* First arg is I32 (rounding mode), second is F64 or I64
1792 (data). */
1793 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1794
1795 case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1796 /* Takes two F64 args. */
1797 case Iop_F64toI32S:
1798 case Iop_F64toF32:
1799 /* First arg is I32 (rounding mode), second is F64 (data). */
1800 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1801
1802 case Iop_F64toI16S:
1803 /* First arg is I32 (rounding mode), second is F64 (data). */
1804 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1805
1806 case Iop_ScaleF64:
1807 case Iop_Yl2xF64:
1808 case Iop_Yl2xp1F64:
1809 case Iop_PRemF64:
1810 case Iop_AtanF64:
1811 case Iop_AddF64:
1812 case Iop_DivF64:
1813 case Iop_SubF64:
1814 case Iop_MulF64:
1815 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1816
1817 case Iop_CmpF64:
1818 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1819
1820 /* non-FP after here */
1821
1822 case Iop_DivModU64to32:
1823 case Iop_DivModS64to32:
1824 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1825
1826 case Iop_16HLto32:
1827 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
1828 case Iop_32HLto64:
1829 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1830
1831 case Iop_MullS32:
1832 case Iop_MullU32: {
1833 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1834 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1835 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1836 }
1837
1838 case Iop_MullS16:
1839 case Iop_MullU16: {
1840 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1841 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1842 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1843 }
1844
1845 case Iop_MullS8:
1846 case Iop_MullU8: {
1847 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1848 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1849 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1850 }
1851
1852 case Iop_Add32:
1853 # if 0
1854 return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
1855 # endif
1856 case Iop_Sub32:
1857 case Iop_Mul32:
1858 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1859
1860 case Iop_Mul16:
1861 case Iop_Add16:
1862 case Iop_Sub16:
1863 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1864
1865 case Iop_Sub8:
1866 case Iop_Add8:
1867 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1868
1869 case Iop_CmpLE32S: case Iop_CmpLE32U:
1870 case Iop_CmpLT32U: case Iop_CmpLT32S:
1871 case Iop_CmpEQ32: case Iop_CmpNE32:
1872 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1873
1874 case Iop_CmpEQ16: case Iop_CmpNE16:
1875 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1876
1877 case Iop_CmpEQ8: case Iop_CmpNE8:
1878 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1879
1880 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1881 /* Complain if the shift amount is undefined. Then simply
1882 shift the first arg's V bits by the real shift amount. */
1883 complainIfUndefined(mce, atom2);
1884 return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1885
1886 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
1887 /* Same scheme as with 32-bit shifts. */
1888 complainIfUndefined(mce, atom2);
1889 return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1890
1891 case Iop_Shl8: case Iop_Shr8:
1892 /* Same scheme as with 32-bit shifts. */
1893 complainIfUndefined(mce, atom2);
1894 return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1895
1896 case Iop_Shl64: case Iop_Shr64:
1897 /* Same scheme as with 32-bit shifts. */
1898 complainIfUndefined(mce, atom2);
1899 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1900
1901 case Iop_AndV128:
1902 uifu = mkUifUV128; difd = mkDifDV128;
1903 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
1904 case Iop_And64:
1905 uifu = mkUifU64; difd = mkDifD64;
1906 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
1907 case Iop_And32:
1908 uifu = mkUifU32; difd = mkDifD32;
1909 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1910 case Iop_And16:
1911 uifu = mkUifU16; difd = mkDifD16;
1912 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1913 case Iop_And8:
1914 uifu = mkUifU8; difd = mkDifD8;
1915 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1916
1917 case Iop_OrV128:
1918 uifu = mkUifUV128; difd = mkDifDV128;
1919 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
1920 case Iop_Or64:
1921 uifu = mkUifU64; difd = mkDifD64;
1922 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
1923 case Iop_Or32:
1924 uifu = mkUifU32; difd = mkDifD32;
1925 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1926 case Iop_Or16:
1927 uifu = mkUifU16; difd = mkDifD16;
1928 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1929 case Iop_Or8:
1930 uifu = mkUifU8; difd = mkDifD8;
1931 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1932
1933 do_And_Or:
1934 return
1935 assignNew(
1936 mce,
1937 and_or_ty,
1938 difd(mce, uifu(mce, vatom1, vatom2),
1939 difd(mce, improve(mce, atom1, vatom1),
1940 improve(mce, atom2, vatom2) ) ) );
1941
1942 case Iop_Xor8:
1943 return mkUifU8(mce, vatom1, vatom2);
1944 case Iop_Xor16:
1945 return mkUifU16(mce, vatom1, vatom2);
1946 case Iop_Xor32:
1947 return mkUifU32(mce, vatom1, vatom2);
1948 case Iop_Xor64:
1949 return mkUifU64(mce, vatom1, vatom2);
1950 case Iop_XorV128:
1951 return mkUifUV128(mce, vatom1, vatom2);
1952
1953 default:
1954 ppIROp(op);
1955 VG_(tool_panic)("memcheck:expr2vbits_Binop");
1956 }
1957 }
1958
1959
1960 static
expr2vbits_Unop(MCEnv * mce,IROp op,IRAtom * atom)1961 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1962 {
1963 IRAtom* vatom = expr2vbits( mce, atom );
1964 tl_assert(isOriginalAtom(mce,atom));
1965 switch (op) {
1966
1967 case Iop_Sqrt64Fx2:
1968 return unary64Fx2(mce, vatom);
1969
1970 case Iop_Sqrt64F0x2:
1971 return unary64F0x2(mce, vatom);
1972
1973 case Iop_Sqrt32Fx4:
1974 case Iop_RecipEst32Fx4:
1975 return unary32Fx4(mce, vatom);
1976
1977 case Iop_Sqrt32F0x4:
1978 case Iop_RSqrtEst32F0x4:
1979 case Iop_RecipEst32F0x4:
1980 return unary32F0x4(mce, vatom);
1981
1982 case Iop_32UtoV128:
1983 case Iop_64UtoV128:
1984 return assignNew(mce, Ity_V128, unop(op, vatom));
1985
1986 case Iop_F32toF64:
1987 case Iop_I32StoF64:
1988 case Iop_NegF64:
1989 case Iop_SinF64:
1990 case Iop_CosF64:
1991 case Iop_TanF64:
1992 case Iop_SqrtF64:
1993 case Iop_AbsF64:
1994 case Iop_2xm1F64:
1995 return mkPCastTo(mce, Ity_I64, vatom);
1996
1997 case Iop_Clz32:
1998 case Iop_Ctz32:
1999 return mkPCastTo(mce, Ity_I32, vatom);
2000
2001 case Iop_32Sto64:
2002 case Iop_32Uto64:
2003 case Iop_V128to64:
2004 case Iop_V128HIto64:
2005 return assignNew(mce, Ity_I64, unop(op, vatom));
2006
2007 case Iop_64to32:
2008 case Iop_64HIto32:
2009 case Iop_1Uto32:
2010 case Iop_8Uto32:
2011 case Iop_16Uto32:
2012 case Iop_16Sto32:
2013 case Iop_8Sto32:
2014 return assignNew(mce, Ity_I32, unop(op, vatom));
2015
2016 case Iop_8Sto16:
2017 case Iop_8Uto16:
2018 case Iop_32to16:
2019 case Iop_32HIto16:
2020 return assignNew(mce, Ity_I16, unop(op, vatom));
2021
2022 case Iop_1Uto8:
2023 case Iop_16to8:
2024 case Iop_32to8:
2025 return assignNew(mce, Ity_I8, unop(op, vatom));
2026
2027 case Iop_32to1:
2028 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
2029
2030 case Iop_ReinterpF64asI64:
2031 case Iop_ReinterpI64asF64:
2032 case Iop_ReinterpI32asF32:
2033 case Iop_NotV128:
2034 case Iop_Not64:
2035 case Iop_Not32:
2036 case Iop_Not16:
2037 case Iop_Not8:
2038 case Iop_Not1:
2039 return vatom;
2040
2041 default:
2042 ppIROp(op);
2043 VG_(tool_panic)("memcheck:expr2vbits_Unop");
2044 }
2045 }
2046
2047
2048 /* Worker function; do not call directly. */
2049 static
expr2vbits_LDle_WRK(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias)2050 IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2051 {
2052 void* helper;
2053 HChar* hname;
2054 IRDirty* di;
2055 IRTemp datavbits;
2056 IRAtom* addrAct;
2057
2058 tl_assert(isOriginalAtom(mce,addr));
2059
2060 /* First, emit a definedness test for the address. This also sets
2061 the address (shadow) to 'defined' following the test. */
2062 complainIfUndefined( mce, addr );
2063
2064 /* Now cook up a call to the relevant helper function, to read the
2065 data V bits from shadow memory. */
2066 ty = shadowType(ty);
2067 switch (ty) {
2068 case Ity_I64: helper = &MC_(helperc_LOADV8);
2069 hname = "MC_(helperc_LOADV8)";
2070 break;
2071 case Ity_I32: helper = &MC_(helperc_LOADV4);
2072 hname = "MC_(helperc_LOADV4)";
2073 break;
2074 case Ity_I16: helper = &MC_(helperc_LOADV2);
2075 hname = "MC_(helperc_LOADV2)";
2076 break;
2077 case Ity_I8: helper = &MC_(helperc_LOADV1);
2078 hname = "MC_(helperc_LOADV1)";
2079 break;
2080 default: ppIRType(ty);
2081 VG_(tool_panic)("memcheck:do_shadow_LDle");
2082 }
2083
2084 /* Generate the actual address into addrAct. */
2085 if (bias == 0) {
2086 addrAct = addr;
2087 } else {
2088 IROp mkAdd;
2089 IRAtom* eBias;
2090 IRType tyAddr = mce->hWordTy;
2091 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2092 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2093 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2094 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2095 }
2096
2097 /* We need to have a place to park the V bits we're just about to
2098 read. */
2099 datavbits = newIRTemp(mce->bb->tyenv, ty);
2100 di = unsafeIRDirty_1_N( datavbits,
2101 1/*regparms*/, hname, helper,
2102 mkIRExprVec_1( addrAct ));
2103 setHelperAnns( mce, di );
2104 stmt( mce->bb, IRStmt_Dirty(di) );
2105
2106 return mkexpr(datavbits);
2107 }
2108
2109
2110 static
expr2vbits_LDle(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias)2111 IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2112 {
2113 IRAtom *v64hi, *v64lo;
2114 switch (shadowType(ty)) {
2115 case Ity_I8:
2116 case Ity_I16:
2117 case Ity_I32:
2118 case Ity_I64:
2119 return expr2vbits_LDle_WRK(mce, ty, addr, bias);
2120 case Ity_V128:
2121 v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
2122 v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
2123 return assignNew( mce,
2124 Ity_V128,
2125 binop(Iop_64HLtoV128, v64hi, v64lo));
2126 default:
2127 VG_(tool_panic)("expr2vbits_LDle");
2128 }
2129 }
2130
2131
2132 static
expr2vbits_ITE(MCEnv * mce,IRAtom * cond,IRAtom * iftrue,IRAtom * iffalse)2133 IRAtom* expr2vbits_ITE ( MCEnv* mce,
2134 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
2135 {
2136 IRAtom *vbitsC, *vbits0, *vbits1;
2137 IRType ty;
2138 /* Given ITE(cond,iftrue,iffalse), generate
2139 ITE(cond,iftrue#,iffalse#) `UifU` PCast(cond#)
2140 That is, steer the V bits like the originals, but trash the
2141 result if the steering value is undefined. This gives
2142 lazy propagation. */
2143 tl_assert(isOriginalAtom(mce, cond));
2144 tl_assert(isOriginalAtom(mce, iftrue));
2145 tl_assert(isOriginalAtom(mce, iffalse));
2146
2147 vbitsC = expr2vbits(mce, cond);
2148 vbits0 = expr2vbits(mce, iffalse);
2149 vbits1 = expr2vbits(mce, iftrue);
2150 ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2151
2152 return
2153 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_ITE(cond, vbits1, vbits0)),
2154 mkPCastTo(mce, ty, vbitsC) );
2155 }
2156
2157 /* --------- This is the main expression-handling function. --------- */
2158
2159 static
expr2vbits(MCEnv * mce,IRExpr * e)2160 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2161 {
2162 switch (e->tag) {
2163
2164 case Iex_Get:
2165 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2166
2167 case Iex_GetI:
2168 return shadow_GETI( mce, e->Iex.GetI.descr,
2169 e->Iex.GetI.ix, e->Iex.GetI.bias );
2170
2171 case Iex_RdTmp:
2172 return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
2173
2174 case Iex_Const:
2175 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2176
2177 case Iex_Binop:
2178 return expr2vbits_Binop(
2179 mce,
2180 e->Iex.Binop.op,
2181 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2182 );
2183
2184 case Iex_Unop:
2185 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2186
2187 case Iex_Load:
2188 return expr2vbits_LDle( mce, e->Iex.Load.ty,
2189 e->Iex.Load.addr, 0/*addr bias*/ );
2190
2191 case Iex_CCall:
2192 return mkLazyN( mce, e->Iex.CCall.args,
2193 e->Iex.CCall.retty,
2194 e->Iex.CCall.cee );
2195
2196 case Iex_ITE:
2197 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
2198 e->Iex.ITE.iffalse);
2199
2200 default:
2201 VG_(printf)("\n");
2202 ppIRExpr(e);
2203 VG_(printf)("\n");
2204 VG_(tool_panic)("memcheck: expr2vbits");
2205 }
2206 }
2207
2208 /*------------------------------------------------------------*/
2209 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
2210 /*------------------------------------------------------------*/
2211
2212 /* Widen a value to the host word size. */
2213
2214 static
zwidenToHostWord(MCEnv * mce,IRAtom * vatom)2215 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2216 {
2217 IRType ty, tyH;
2218
2219 /* vatom is vbits-value and as such can only have a shadow type. */
2220 tl_assert(isShadowAtom(mce,vatom));
2221
2222 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
2223 tyH = mce->hWordTy;
2224
2225 if (tyH == Ity_I32) {
2226 switch (ty) {
2227 case Ity_I32: return vatom;
2228 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2229 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2230 default: goto unhandled;
2231 }
2232 } else {
2233 goto unhandled;
2234 }
2235 unhandled:
2236 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2237 VG_(tool_panic)("zwidenToHostWord");
2238 }
2239
2240
2241 /* Generate a shadow store. addr is always the original address atom.
2242 You can pass in either originals or V-bits for the data atom, but
2243 obviously not both. */
2244
2245 static
do_shadow_STle(MCEnv * mce,IRAtom * addr,UInt bias,IRAtom * data,IRAtom * vdata)2246 void do_shadow_STle ( MCEnv* mce,
2247 IRAtom* addr, UInt bias,
2248 IRAtom* data, IRAtom* vdata )
2249 {
2250 IROp mkAdd;
2251 IRType ty, tyAddr;
2252 IRDirty *di, *diLo64, *diHi64;
2253 IRAtom *addrAct, *addrLo64, *addrHi64;
2254 IRAtom *vdataLo64, *vdataHi64;
2255 IRAtom *eBias, *eBias0, *eBias8;
2256 void* helper = NULL;
2257 HChar* hname = NULL;
2258
2259 tyAddr = mce->hWordTy;
2260 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2261 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2262
2263 di = diLo64 = diHi64 = NULL;
2264 eBias = eBias0 = eBias8 = NULL;
2265 addrAct = addrLo64 = addrHi64 = NULL;
2266 vdataLo64 = vdataHi64 = NULL;
2267
2268 if (data) {
2269 tl_assert(!vdata);
2270 tl_assert(isOriginalAtom(mce, data));
2271 tl_assert(bias == 0);
2272 vdata = expr2vbits( mce, data );
2273 } else {
2274 tl_assert(vdata);
2275 }
2276
2277 tl_assert(isOriginalAtom(mce,addr));
2278 tl_assert(isShadowAtom(mce,vdata));
2279
2280 ty = typeOfIRExpr(mce->bb->tyenv, vdata);
2281
2282 /* First, emit a definedness test for the address. This also sets
2283 the address (shadow) to 'defined' following the test. */
2284 complainIfUndefined( mce, addr );
2285
2286 /* Now decide which helper function to call to write the data V
2287 bits into shadow memory. */
2288 switch (ty) {
2289 case Ity_V128: /* we'll use the helper twice */
2290 case Ity_I64: helper = &MC_(helperc_STOREV8);
2291 hname = "MC_(helperc_STOREV8)";
2292 break;
2293 case Ity_I32: helper = &MC_(helperc_STOREV4);
2294 hname = "MC_(helperc_STOREV4)";
2295 break;
2296 case Ity_I16: helper = &MC_(helperc_STOREV2);
2297 hname = "MC_(helperc_STOREV2)";
2298 break;
2299 case Ity_I8: helper = &MC_(helperc_STOREV1);
2300 hname = "MC_(helperc_STOREV1)";
2301 break;
2302 default: VG_(tool_panic)("memcheck:do_shadow_STle");
2303 }
2304
2305 if (ty == Ity_V128) {
2306
2307 /* V128-bit case */
2308 /* See comment in next clause re 64-bit regparms */
2309 eBias0 = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2310 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
2311 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
2312 diLo64 = unsafeIRDirty_0_N(
2313 1/*regparms*/, hname, helper,
2314 mkIRExprVec_2( addrLo64, vdataLo64 ));
2315
2316 eBias8 = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2317 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
2318 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
2319 diHi64 = unsafeIRDirty_0_N(
2320 1/*regparms*/, hname, helper,
2321 mkIRExprVec_2( addrHi64, vdataHi64 ));
2322
2323 setHelperAnns( mce, diLo64 );
2324 setHelperAnns( mce, diHi64 );
2325 stmt( mce->bb, IRStmt_Dirty(diLo64) );
2326 stmt( mce->bb, IRStmt_Dirty(diHi64) );
2327
2328 } else {
2329
2330 /* 8/16/32/64-bit cases */
2331 /* Generate the actual address into addrAct. */
2332 if (bias == 0) {
2333 addrAct = addr;
2334 } else {
2335 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2336 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2337 }
2338
2339 if (ty == Ity_I64) {
2340 /* We can't do this with regparm 2 on 32-bit platforms, since
2341 the back ends aren't clever enough to handle 64-bit
2342 regparm args. Therefore be different. */
2343 di = unsafeIRDirty_0_N(
2344 1/*regparms*/, hname, helper,
2345 mkIRExprVec_2( addrAct, vdata ));
2346 } else {
2347 di = unsafeIRDirty_0_N(
2348 2/*regparms*/, hname, helper,
2349 mkIRExprVec_2( addrAct,
2350 zwidenToHostWord( mce, vdata )));
2351 }
2352 setHelperAnns( mce, di );
2353 stmt( mce->bb, IRStmt_Dirty(di) );
2354 }
2355
2356 }
2357
2358
2359 /* Do lazy pessimistic propagation through a dirty helper call, by
2360 looking at the annotations on it. This is the most complex part of
2361 Memcheck. */
2362
szToITy(Int n)2363 static IRType szToITy ( Int n )
2364 {
2365 switch (n) {
2366 case 1: return Ity_I8;
2367 case 2: return Ity_I16;
2368 case 4: return Ity_I32;
2369 case 8: return Ity_I64;
2370 default: VG_(tool_panic)("szToITy(memcheck)");
2371 }
2372 }
2373
2374 static
do_shadow_Dirty(MCEnv * mce,IRDirty * d)2375 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2376 {
2377 Int i, n, offset, toDo, gSz, gOff;
2378 IRAtom *src, *here, *curr;
2379 IRType tyAddr, tySrc, tyDst;
2380 IRTemp dst;
2381
2382 /* First check the guard. */
2383 complainIfUndefined(mce, d->guard);
2384
2385 /* Now round up all inputs and PCast over them. */
2386 curr = definedOfType(Ity_I32);
2387
2388 /* Inputs: unmasked args */
2389 for (i = 0; d->args[i]; i++) {
2390 if (d->cee->mcx_mask & (1<<i)) {
2391 /* ignore this arg */
2392 } else {
2393 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2394 curr = mkUifU32(mce, here, curr);
2395 }
2396 }
2397
2398 /* Inputs: guest state that we read. */
2399 for (i = 0; i < d->nFxState; i++) {
2400 tl_assert(d->fxState[i].fx != Ifx_None);
2401 if (d->fxState[i].fx == Ifx_Write)
2402 continue;
2403
2404 /* Ignore any sections marked as 'always defined'. */
2405 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
2406 if (0)
2407 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2408 d->fxState[i].offset, d->fxState[i].size );
2409 continue;
2410 }
2411
2412 /* This state element is read or modified. So we need to
2413 consider it. If larger than 8 bytes, deal with it in 8-byte
2414 chunks. */
2415 gSz = d->fxState[i].size;
2416 gOff = d->fxState[i].offset;
2417 tl_assert(gSz > 0);
2418 while (True) {
2419 if (gSz == 0) break;
2420 n = gSz <= 8 ? gSz : 8;
2421 /* update 'curr' with UifU of the state slice
2422 gOff .. gOff+n-1 */
2423 tySrc = szToITy( n );
2424 src = assignNew( mce, tySrc,
2425 shadow_GET(mce, gOff, tySrc ) );
2426 here = mkPCastTo( mce, Ity_I32, src );
2427 curr = mkUifU32(mce, here, curr);
2428 gSz -= n;
2429 gOff += n;
2430 }
2431
2432 }
2433
2434 /* Inputs: memory. First set up some info needed regardless of
2435 whether we're doing reads or writes. */
2436 tyAddr = Ity_INVALID;
2437
2438 if (d->mFx != Ifx_None) {
2439 /* Because we may do multiple shadow loads/stores from the same
2440 base address, it's best to do a single test of its
2441 definedness right now. Post-instrumentation optimisation
2442 should remove all but this test. */
2443 tl_assert(d->mAddr);
2444 complainIfUndefined(mce, d->mAddr);
2445
2446 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2447 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2448 tl_assert(tyAddr == mce->hWordTy); /* not really right */
2449 }
2450
2451 /* Deal with memory inputs (reads or modifies) */
2452 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2453 offset = 0;
2454 toDo = d->mSize;
2455 /* chew off 32-bit chunks */
2456 while (toDo >= 4) {
2457 here = mkPCastTo(
2458 mce, Ity_I32,
2459 expr2vbits_LDle ( mce, Ity_I32,
2460 d->mAddr, d->mSize - toDo )
2461 );
2462 curr = mkUifU32(mce, here, curr);
2463 toDo -= 4;
2464 }
2465 /* chew off 16-bit chunks */
2466 while (toDo >= 2) {
2467 here = mkPCastTo(
2468 mce, Ity_I32,
2469 expr2vbits_LDle ( mce, Ity_I16,
2470 d->mAddr, d->mSize - toDo )
2471 );
2472 curr = mkUifU32(mce, here, curr);
2473 toDo -= 2;
2474 }
2475 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2476 }
2477
2478 /* Whew! So curr is a 32-bit V-value summarising pessimistically
2479 all the inputs to the helper. Now we need to re-distribute the
2480 results to all destinations. */
2481
2482 /* Outputs: the destination temporary, if there is one. */
2483 if (d->tmp != IRTemp_INVALID) {
2484 dst = findShadowTmp(mce, d->tmp);
2485 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2486 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2487 }
2488
2489 /* Outputs: guest state that we write or modify. */
2490 for (i = 0; i < d->nFxState; i++) {
2491 tl_assert(d->fxState[i].fx != Ifx_None);
2492 if (d->fxState[i].fx == Ifx_Read)
2493 continue;
2494 /* Ignore any sections marked as 'always defined'. */
2495 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2496 continue;
2497 /* This state element is written or modified. So we need to
2498 consider it. If larger than 8 bytes, deal with it in 8-byte
2499 chunks. */
2500 gSz = d->fxState[i].size;
2501 gOff = d->fxState[i].offset;
2502 tl_assert(gSz > 0);
2503 while (True) {
2504 if (gSz == 0) break;
2505 n = gSz <= 8 ? gSz : 8;
2506 /* Write suitably-casted 'curr' to the state slice
2507 gOff .. gOff+n-1 */
2508 tyDst = szToITy( n );
2509 do_shadow_PUT( mce, gOff,
2510 NULL, /* original atom */
2511 mkPCastTo( mce, tyDst, curr ) );
2512 gSz -= n;
2513 gOff += n;
2514 }
2515 }
2516
2517 /* Outputs: memory that we write or modify. */
2518 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2519 offset = 0;
2520 toDo = d->mSize;
2521 /* chew off 32-bit chunks */
2522 while (toDo >= 4) {
2523 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2524 NULL, /* original data */
2525 mkPCastTo( mce, Ity_I32, curr ) );
2526 toDo -= 4;
2527 }
2528 /* chew off 16-bit chunks */
2529 while (toDo >= 2) {
2530 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2531 NULL, /* original data */
2532 mkPCastTo( mce, Ity_I16, curr ) );
2533 toDo -= 2;
2534 }
2535 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2536 }
2537
2538 }
2539
2540
2541 /*------------------------------------------------------------*/
2542 /*--- Memcheck main ---*/
2543 /*------------------------------------------------------------*/
2544
isBogusAtom(IRAtom * at)2545 static Bool isBogusAtom ( IRAtom* at )
2546 {
2547 ULong n = 0;
2548 IRConst* con;
2549 tl_assert(isIRAtom(at));
2550 if (at->tag == Iex_RdTmp)
2551 return False;
2552 tl_assert(at->tag == Iex_Const);
2553 con = at->Iex.Const.con;
2554 switch (con->tag) {
2555 case Ico_U8: n = (ULong)con->Ico.U8; break;
2556 case Ico_U16: n = (ULong)con->Ico.U16; break;
2557 case Ico_U32: n = (ULong)con->Ico.U32; break;
2558 case Ico_U64: n = (ULong)con->Ico.U64; break;
2559 default: ppIRExpr(at); tl_assert(0);
2560 }
2561 /* VG_(printf)("%llx\n", n); */
2562 return (n == 0xFEFEFEFF
2563 || n == 0x80808080
2564 || n == 0x1010101
2565 || n == 1010100);
2566 }
2567
2568 __attribute__((unused))
checkForBogusLiterals(IRStmt * st)2569 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2570 {
2571 Int i;
2572 IRExpr* e;
2573 switch (st->tag) {
2574 case Ist_WrTmp:
2575 e = st->Ist.WrTmp.data;
2576 switch (e->tag) {
2577 case Iex_Get:
2578 case Iex_RdTmp:
2579 return False;
2580 case Iex_Unop:
2581 return isBogusAtom(e->Iex.Unop.arg);
2582 case Iex_Binop:
2583 return isBogusAtom(e->Iex.Binop.arg1)
2584 || isBogusAtom(e->Iex.Binop.arg2);
2585 case Iex_ITE:
2586 return isBogusAtom(e->Iex.ITE.cond)
2587 || isBogusAtom(e->Iex.ITE.iftrue)
2588 || isBogusAtom(e->Iex.ITE.iffalse);
2589 case Iex_Load:
2590 return isBogusAtom(e->Iex.Load.addr);
2591 case Iex_CCall:
2592 for (i = 0; e->Iex.CCall.args[i]; i++)
2593 if (isBogusAtom(e->Iex.CCall.args[i]))
2594 return True;
2595 return False;
2596 default:
2597 goto unhandled;
2598 }
2599 case Ist_Put:
2600 return isBogusAtom(st->Ist.Put.data);
2601 case Ist_Store:
2602 return isBogusAtom(st->Ist.Store.addr)
2603 || isBogusAtom(st->Ist.Store.data);
2604 case Ist_Exit:
2605 return isBogusAtom(st->Ist.Exit.guard);
2606 default:
2607 unhandled:
2608 ppIRStmt(st);
2609 VG_(tool_panic)("hasBogusLiterals");
2610 }
2611 }
2612
mc_instrument(void * closureV,IRSB * bb_in,VexGuestLayout * layout,VexGuestExtents * vge,IRType gWordTy,IRType hWordTy)2613 IRSB* mc_instrument ( void* closureV,
2614 IRSB* bb_in, VexGuestLayout* layout,
2615 VexGuestExtents* vge,
2616 IRType gWordTy, IRType hWordTy )
2617 {
2618 Bool verboze = False; //True;
2619
2620 /* Bool hasBogusLiterals = False; */
2621
2622 Int i, j, first_stmt;
2623 IRStmt* st;
2624 MCEnv mce;
2625
2626 /* Set up BB */
2627 IRSB* bb = emptyIRSB();
2628 bb->tyenv = deepCopyIRTypeEnv(bb_in->tyenv);
2629 bb->next = deepCopyIRExpr(bb_in->next);
2630 bb->jumpkind = bb_in->jumpkind;
2631
2632 /* Set up the running environment. Only .bb is modified as we go
2633 along. */
2634 mce.bb = bb;
2635 mce.layout = layout;
2636 mce.n_originalTmps = bb->tyenv->types_used;
2637 mce.hWordTy = hWordTy;
2638 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2639 for (i = 0; i < mce.n_originalTmps; i++)
2640 mce.tmpMap[i] = IRTemp_INVALID;
2641
2642 /* Iterate over the stmts. */
2643
2644 for (i = 0; i < bb_in->stmts_used; i++) {
2645 st = bb_in->stmts[i];
2646 if (!st) continue;
2647
2648 tl_assert(isFlatIRStmt(st));
2649
2650 /*
2651 if (!hasBogusLiterals) {
2652 hasBogusLiterals = checkForBogusLiterals(st);
2653 if (hasBogusLiterals) {
2654 VG_(printf)("bogus: ");
2655 ppIRStmt(st);
2656 VG_(printf)("\n");
2657 }
2658 }
2659 */
2660 first_stmt = bb->stmts_used;
2661
2662 if (verboze) {
2663 ppIRStmt(st);
2664 VG_(printf)("\n\n");
2665 }
2666
2667 switch (st->tag) {
2668
2669 case Ist_WrTmp:
2670 assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
2671 expr2vbits( &mce, st->Ist.WrTmp.data) );
2672 break;
2673
2674 case Ist_Put:
2675 do_shadow_PUT( &mce,
2676 st->Ist.Put.offset,
2677 st->Ist.Put.data,
2678 NULL /* shadow atom */ );
2679 break;
2680
2681 case Ist_PutI:
2682 do_shadow_PUTI( &mce,
2683 st->Ist.PutI.details->descr,
2684 st->Ist.PutI.details->ix,
2685 st->Ist.PutI.details->bias,
2686 st->Ist.PutI.details->data );
2687 break;
2688
2689 case Ist_Store:
2690 do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
2691 st->Ist.Store.data,
2692 NULL /* shadow data */ );
2693 break;
2694
2695 case Ist_Exit:
2696 /* if (!hasBogusLiterals) */
2697 complainIfUndefined( &mce, st->Ist.Exit.guard );
2698 break;
2699
2700 case Ist_Dirty:
2701 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
2702 break;
2703
2704 case Ist_IMark:
2705 case Ist_NoOp:
2706 break;
2707
2708 default:
2709 VG_(printf)("\n");
2710 ppIRStmt(st);
2711 VG_(printf)("\n");
2712 VG_(tool_panic)("memcheck: unhandled IRStmt");
2713
2714 } /* switch (st->tag) */
2715
2716 if (verboze) {
2717 for (j = first_stmt; j < bb->stmts_used; j++) {
2718 VG_(printf)(" ");
2719 ppIRStmt(bb->stmts[j]);
2720 VG_(printf)("\n");
2721 }
2722 VG_(printf)("\n");
2723 }
2724
2725 addStmtToIRSB(bb, st);
2726
2727 }
2728
2729 /* Now we need to complain if the jump target is undefined. */
2730 first_stmt = bb->stmts_used;
2731
2732 if (verboze) {
2733 VG_(printf)("bb->next = ");
2734 ppIRExpr(bb->next);
2735 VG_(printf)("\n\n");
2736 }
2737
2738 complainIfUndefined( &mce, bb->next );
2739
2740 if (verboze) {
2741 for (j = first_stmt; j < bb->stmts_used; j++) {
2742 VG_(printf)(" ");
2743 ppIRStmt(bb->stmts[j]);
2744 VG_(printf)("\n");
2745 }
2746 VG_(printf)("\n");
2747 }
2748
2749 return bb;
2750 }
2751 #endif /* UNUSED */
2752
2753 /*--------------------------------------------------------------------*/
2754 /*--- end test_main.c ---*/
2755 /*--------------------------------------------------------------------*/
2756