1
2 /*---------------------------------------------------------------*/
3 /*--- begin test_main.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2013 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <assert.h>
39 #include <string.h>
40
41 #include "libvex_basictypes.h"
42 #include "libvex.h"
43
44 #include "test_main.h"
45
46
47 /*---------------------------------------------------------------*/
48 /*--- Test ---*/
49 /*---------------------------------------------------------------*/
50
51
52 __attribute__ ((noreturn))
53 static
failure_exit(void)54 void failure_exit ( void )
55 {
56 fprintf(stdout, "VEX did failure_exit. Bye.\n");
57 exit(1);
58 }
59
60 static
log_bytes(HChar * bytes,Int nbytes)61 void log_bytes ( HChar* bytes, Int nbytes )
62 {
63 fwrite ( bytes, 1, nbytes, stdout );
64 }
65
66 #define N_LINEBUF 10000
67 static HChar linebuf[N_LINEBUF];
68
69 #define N_ORIGBUF 10000
70 #define N_TRANSBUF 5000
71
72 static UChar origbuf[N_ORIGBUF];
73 static UChar transbuf[N_TRANSBUF];
74
75 static Bool verbose = True;
76
77 /* Forwards */
78 #if 1 /* UNUSED */
79 //static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
80 static
81 IRSB* mc_instrument ( void* closureV,
82 IRSB* bb_in, VexGuestLayout* layout,
83 VexGuestExtents* vge,
84 IRType gWordTy, IRType hWordTy );
85 #endif
86
chase_into_not_ok(void * opaque,Addr64 dst)87 static Bool chase_into_not_ok ( void* opaque, Addr64 dst ) {
88 return False;
89 }
needs_self_check(void * opaque,VexGuestExtents * vge)90 static UInt needs_self_check ( void* opaque, VexGuestExtents* vge ) {
91 return 0;
92 }
93
main(int argc,char ** argv)94 int main ( int argc, char** argv )
95 {
96 FILE* f;
97 Int i;
98 UInt u, sum;
99 Addr32 orig_addr;
100 Int bb_number, n_bbs_done = 0;
101 Int orig_nbytes, trans_used;
102 VexTranslateResult tres;
103 VexControl vcon;
104 VexGuestExtents vge;
105 VexArchInfo vai_x86, vai_amd64, vai_ppc32, vai_arm;
106 VexAbiInfo vbi;
107 VexTranslateArgs vta;
108
109 if (argc != 2) {
110 fprintf(stderr, "usage: vex file.orig\n");
111 exit(1);
112 }
113 f = fopen(argv[1], "r");
114 if (!f) {
115 fprintf(stderr, "can't open `%s'\n", argv[1]);
116 exit(1);
117 }
118
119 /* Run with default params. However, we can't allow bb chasing
120 since that causes the front end to get segfaults when it tries
121 to read code outside the initial BB we hand it. So when calling
122 LibVEX_Translate, send in a chase-into predicate that always
123 returns False. */
124 LibVEX_default_VexControl ( &vcon );
125 vcon.iropt_level = 2;
126 vcon.guest_max_insns = 60;
127
128 LibVEX_Init ( &failure_exit, &log_bytes,
129 1, /* debug_paranoia */
130 TEST_VSUPPORT, /* valgrind support */
131 &vcon );
132
133
134 while (!feof(f)) {
135
136 __attribute__((unused))
137 char* unused1 = fgets(linebuf, N_LINEBUF,f);
138 if (linebuf[0] == 0) continue;
139 if (linebuf[0] != '.') continue;
140
141 if (n_bbs_done == TEST_N_BBS) break;
142 n_bbs_done++;
143
144 /* first line is: . bb-number bb-addr n-bytes */
145 assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
146 & bb_number,
147 & orig_addr, & orig_nbytes ));
148 assert(orig_nbytes >= 1);
149 assert(!feof(f));
150 __attribute__((unused))
151 char* unused2 = fgets(linebuf, N_LINEBUF,f);
152 assert(linebuf[0] == '.');
153
154 /* second line is: . byte byte byte etc */
155 if (verbose)
156 printf("============ Basic Block %d, Done %d, "
157 "Start %x, nbytes %2d ============",
158 bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
159
160 /* thumb ITstate analysis needs to examine the 18 bytes
161 preceding the first instruction. So let's leave the first 18
162 zeroed out. */
163 memset(origbuf, 0, sizeof(origbuf));
164
165 assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
166 for (i = 0; i < orig_nbytes; i++) {
167 assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
168 origbuf[18+ i] = (UChar)u;
169 }
170
171 /* FIXME: put sensible values into the .hwcaps fields */
172 LibVEX_default_VexArchInfo(&vai_x86);
173 vai_x86.hwcaps = VEX_HWCAPS_X86_SSE1
174 | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
175
176 LibVEX_default_VexArchInfo(&vai_amd64);
177 vai_amd64.hwcaps = 0;
178
179 LibVEX_default_VexArchInfo(&vai_ppc32);
180 vai_ppc32.hwcaps = 0;
181 vai_ppc32.ppc_icache_line_szB = 128;
182
183 LibVEX_default_VexArchInfo(&vai_arm);
184 vai_arm.hwcaps = VEX_HWCAPS_ARM_VFP3 | VEX_HWCAPS_ARM_NEON | 7;
185
186 LibVEX_default_VexAbiInfo(&vbi);
187 vbi.guest_stack_redzone_size = 128;
188
189 /* ----- Set up args for LibVEX_Translate ----- */
190
191 vta.abiinfo_both = vbi;
192 vta.guest_bytes = &origbuf[18];
193 vta.guest_bytes_addr = (Addr64)orig_addr;
194 vta.callback_opaque = NULL;
195 vta.chase_into_ok = chase_into_not_ok;
196 vta.guest_extents = &vge;
197 vta.host_bytes = transbuf;
198 vta.host_bytes_size = N_TRANSBUF;
199 vta.host_bytes_used = &trans_used;
200
201 #if 0 /* ppc32 -> ppc32 */
202 vta.arch_guest = VexArchPPC32;
203 vta.archinfo_guest = vai_ppc32;
204 vta.arch_host = VexArchPPC32;
205 vta.archinfo_host = vai_ppc32;
206 #endif
207 #if 0 /* amd64 -> amd64 */
208 vta.arch_guest = VexArchAMD64;
209 vta.archinfo_guest = vai_amd64;
210 vta.arch_host = VexArchAMD64;
211 vta.archinfo_host = vai_amd64;
212 #endif
213 #if 0 /* x86 -> x86 */
214 vta.arch_guest = VexArchX86;
215 vta.archinfo_guest = vai_x86;
216 vta.arch_host = VexArchX86;
217 vta.archinfo_host = vai_x86;
218 #endif
219 #if 1 /* arm -> arm */
220 vta.arch_guest = VexArchARM;
221 vta.archinfo_guest = vai_arm;
222 vta.arch_host = VexArchARM;
223 vta.archinfo_host = vai_arm;
224 /* ARM/Thumb only hacks, that are needed to keep the ITstate
225 analyser in the front end happy. */
226 vta.guest_bytes = &origbuf[18 +1];
227 vta.guest_bytes_addr = (Addr64)(&origbuf[18 +1]);
228 #endif
229
230 #if 1 /* no instrumentation */
231 vta.instrument1 = NULL;
232 vta.instrument2 = NULL;
233 #endif
234 #if 0 /* addrcheck */
235 vta.instrument1 = ac_instrument;
236 vta.instrument2 = NULL;
237 #endif
238 #if 0 /* memcheck */
239 vta.instrument1 = mc_instrument;
240 vta.instrument2 = NULL;
241 #endif
242 vta.needs_self_check = needs_self_check;
243 vta.preamble_function = NULL;
244 vta.traceflags = TEST_FLAGS;
245 vta.addProfInc = False;
246 vta.sigill_diag = True;
247
248 vta.disp_cp_chain_me_to_slowEP = (void*)0x12345678;
249 vta.disp_cp_chain_me_to_fastEP = (void*)0x12345679;
250 vta.disp_cp_xindir = (void*)0x1234567A;
251 vta.disp_cp_xassisted = (void*)0x1234567B;
252
253 vta.finaltidy = NULL;
254
255 for (i = 0; i < TEST_N_ITERS; i++)
256 tres = LibVEX_Translate ( &vta );
257
258 if (tres.status != VexTransOK)
259 printf("\ntres = %d\n", (Int)tres.status);
260 assert(tres.status == VexTransOK);
261 assert(tres.n_sc_extents == 0);
262 assert(vge.n_used == 1);
263 assert((UInt)(vge.len[0]) == orig_nbytes);
264
265 sum = 0;
266 for (i = 0; i < trans_used; i++)
267 sum += (UInt)transbuf[i];
268 printf ( " %6.2f ... %u\n",
269 (double)trans_used / (double)vge.len[0], sum );
270 }
271
272 fclose(f);
273 printf("\n");
274 LibVEX_ShowAllocStats();
275
276 return 0;
277 }
278
279 //////////////////////////////////////////////////////////////////////
280 //////////////////////////////////////////////////////////////////////
281 //////////////////////////////////////////////////////////////////////
282 //////////////////////////////////////////////////////////////////////
283 //////////////////////////////////////////////////////////////////////
284 //////////////////////////////////////////////////////////////////////
285 //////////////////////////////////////////////////////////////////////
286 //////////////////////////////////////////////////////////////////////
287
288 #if 0 /* UNUSED */
289
290 static
291 __attribute((noreturn))
292 void panic ( HChar* s )
293 {
294 printf("\npanic: %s\n", s);
295 failure_exit();
296 }
297
298 static
299 IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
300 {
301 /* Use this rather than eg. -1 because it's a UInt. */
302 #define INVALID_DATA_SIZE 999999
303
304 Int i;
305 Int sz;
306 IRCallee* helper;
307 IRStmt* st;
308 IRExpr* data;
309 IRExpr* addr;
310 Bool needSz;
311
312 /* Set up BB */
313 IRSB* bb = emptyIRSB();
314 bb->tyenv = dopyIRTypeEnv(bb_in->tyenv);
315 bb->next = dopyIRExpr(bb_in->next);
316 bb->jumpkind = bb_in->jumpkind;
317
318 /* No loads to consider in ->next. */
319 assert(isIRAtom(bb_in->next));
320
321 for (i = 0; i < bb_in->stmts_used; i++) {
322 st = bb_in->stmts[i];
323 if (!st) continue;
324
325 switch (st->tag) {
326
327 case Ist_Tmp:
328 data = st->Ist.Tmp.data;
329 if (data->tag == Iex_LDle) {
330 addr = data->Iex.LDle.addr;
331 sz = sizeofIRType(data->Iex.LDle.ty);
332 needSz = False;
333 switch (sz) {
334 case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
335 (void*)0x12345601); break;
336 case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
337 (void*)0x12345602); break;
338 case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
339 (void*)0x12345603); break;
340 default: helper = mkIRCallee(0, "ac_helperc_LOADN",
341 (void*)0x12345604);
342 needSz = True; break;
343 }
344 if (needSz) {
345 addStmtToIRSB(
346 bb,
347 IRStmt_Dirty(
348 unsafeIRDirty_0_N( helper->regparms,
349 helper->name, helper->addr,
350 mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
351 ));
352 } else {
353 addStmtToIRSB(
354 bb,
355 IRStmt_Dirty(
356 unsafeIRDirty_0_N( helper->regparms,
357 helper->name, helper->addr,
358 mkIRExprVec_1(addr) )
359 ));
360 }
361 }
362 break;
363
364 case Ist_STle:
365 data = st->Ist.STle.data;
366 addr = st->Ist.STle.addr;
367 assert(isIRAtom(data));
368 assert(isIRAtom(addr));
369 sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
370 needSz = False;
371 switch (sz) {
372 case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
373 (void*)0x12345605); break;
374 case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
375 (void*)0x12345606); break;
376 case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
377 (void*)0x12345607); break;
378 default: helper = mkIRCallee(0, "ac_helperc_STOREN",
379 (void*)0x12345608);
380 needSz = True; break;
381 }
382 if (needSz) {
383 addStmtToIRSB(
384 bb,
385 IRStmt_Dirty(
386 unsafeIRDirty_0_N( helper->regparms,
387 helper->name, helper->addr,
388 mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
389 ));
390 } else {
391 addStmtToIRSB(
392 bb,
393 IRStmt_Dirty(
394 unsafeIRDirty_0_N( helper->regparms,
395 helper->name, helper->addr,
396 mkIRExprVec_1(addr) )
397 ));
398 }
399 break;
400
401 case Ist_Put:
402 assert(isIRAtom(st->Ist.Put.data));
403 break;
404
405 case Ist_PutI:
406 assert(isIRAtom(st->Ist.PutI.ix));
407 assert(isIRAtom(st->Ist.PutI.data));
408 break;
409
410 case Ist_Exit:
411 assert(isIRAtom(st->Ist.Exit.guard));
412 break;
413
414 case Ist_Dirty:
415 /* If the call doesn't interact with memory, we ain't
416 interested. */
417 if (st->Ist.Dirty.details->mFx == Ifx_None)
418 break;
419 goto unhandled;
420
421 default:
422 unhandled:
423 printf("\n");
424 ppIRStmt(st);
425 printf("\n");
426 panic("addrcheck: unhandled IRStmt");
427 }
428
429 addStmtToIRSB( bb, dopyIRStmt(st));
430 }
431
432 return bb;
433 }
434 #endif /* UNUSED */
435
436 //////////////////////////////////////////////////////////////////////
437 //////////////////////////////////////////////////////////////////////
438 //////////////////////////////////////////////////////////////////////
439 //////////////////////////////////////////////////////////////////////
440 //////////////////////////////////////////////////////////////////////
441 //////////////////////////////////////////////////////////////////////
442 //////////////////////////////////////////////////////////////////////
443 //////////////////////////////////////////////////////////////////////
444
445 #if 1 /* UNUSED */
446
447 static
448 __attribute((noreturn))
panic(HChar * s)449 void panic ( HChar* s )
450 {
451 printf("\npanic: %s\n", s);
452 failure_exit();
453 }
454
455 #define tl_assert(xxx) assert(xxx)
456 #define VG_(xxxx) xxxx
457 #define tool_panic(zzz) panic(zzz)
458 #define MC_(zzzz) MC_##zzzz
459 #define TL_(zzzz) SK_##zzzz
460
461
462 static void MC_helperc_complain_undef ( void );
463 static void MC_helperc_LOADV8 ( void );
464 static void MC_helperc_LOADV4 ( void );
465 static void MC_helperc_LOADV2 ( void );
466 static void MC_helperc_LOADV1 ( void );
467 static void MC_helperc_STOREV8( void );
468 static void MC_helperc_STOREV4( void );
469 static void MC_helperc_STOREV2( void );
470 static void MC_helperc_STOREV1( void );
471 static void MC_helperc_value_check0_fail( void );
472 static void MC_helperc_value_check1_fail( void );
473 static void MC_helperc_value_check4_fail( void );
474
MC_helperc_complain_undef(void)475 static void MC_helperc_complain_undef ( void ) { }
MC_helperc_LOADV8(void)476 static void MC_helperc_LOADV8 ( void ) { }
MC_helperc_LOADV4(void)477 static void MC_helperc_LOADV4 ( void ) { }
MC_helperc_LOADV2(void)478 static void MC_helperc_LOADV2 ( void ) { }
MC_helperc_LOADV1(void)479 static void MC_helperc_LOADV1 ( void ) { }
MC_helperc_STOREV8(void)480 static void MC_helperc_STOREV8( void ) { }
MC_helperc_STOREV4(void)481 static void MC_helperc_STOREV4( void ) { }
MC_helperc_STOREV2(void)482 static void MC_helperc_STOREV2( void ) { }
MC_helperc_STOREV1(void)483 static void MC_helperc_STOREV1( void ) { }
MC_helperc_value_check0_fail(void)484 static void MC_helperc_value_check0_fail( void ) { }
MC_helperc_value_check1_fail(void)485 static void MC_helperc_value_check1_fail( void ) { }
MC_helperc_value_check4_fail(void)486 static void MC_helperc_value_check4_fail( void ) { }
487
488
489 /*--------------------------------------------------------------------*/
490 /*--- Instrument IR to perform memory checking operations. ---*/
491 /*--- mc_translate.c ---*/
492 /*--------------------------------------------------------------------*/
493
494 /*
495 This file is part of MemCheck, a heavyweight Valgrind tool for
496 detecting memory errors.
497
498 Copyright (C) 2000-2013 Julian Seward
499 jseward@acm.org
500
501 This program is free software; you can redistribute it and/or
502 modify it under the terms of the GNU General Public License as
503 published by the Free Software Foundation; either version 2 of the
504 License, or (at your option) any later version.
505
506 This program is distributed in the hope that it will be useful, but
507 WITHOUT ANY WARRANTY; without even the implied warranty of
508 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
509 General Public License for more details.
510
511 You should have received a copy of the GNU General Public License
512 along with this program; if not, write to the Free Software
513 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
514 02111-1307, USA.
515
516 The GNU General Public License is contained in the file COPYING.
517 */
518
519 //#include "mc_include.h"
520
521
522 /*------------------------------------------------------------*/
523 /*--- Forward decls ---*/
524 /*------------------------------------------------------------*/
525
526 struct _MCEnv;
527
528 static IRType shadowType ( IRType ty );
529 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
530
531
532 /*------------------------------------------------------------*/
533 /*--- Memcheck running state, and tmp management. ---*/
534 /*------------------------------------------------------------*/
535
536 /* Carries around state during memcheck instrumentation. */
537 typedef
538 struct _MCEnv {
539 /* MODIFIED: the bb being constructed. IRStmts are added. */
540 IRSB* bb;
541
542 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
543 original temps to their current their current shadow temp.
544 Initially all entries are IRTemp_INVALID. Entries are added
545 lazily since many original temps are not used due to
546 optimisation prior to instrumentation. Note that floating
547 point original tmps are shadowed by integer tmps of the same
548 size, and Bit-typed original tmps are shadowed by the type
549 Ity_I8. See comment below. */
550 IRTemp* tmpMap;
551 Int n_originalTmps; /* for range checking */
552
553 /* READONLY: the guest layout. This indicates which parts of
554 the guest state should be regarded as 'always defined'. */
555 VexGuestLayout* layout;
556 /* READONLY: the host word type. Needed for constructing
557 arguments of type 'HWord' to be passed to helper functions.
558 Ity_I32 or Ity_I64 only. */
559 IRType hWordTy;
560 }
561 MCEnv;
562
563 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
564 demand), as they are encountered. This is for two reasons.
565
566 (1) (less important reason): Many original tmps are unused due to
567 initial IR optimisation, and we do not want to spaces in tables
568 tracking them.
569
570 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
571 table indexed [0 .. n_types-1], which gives the current shadow for
572 each original tmp, or INVALID_IRTEMP if none is so far assigned.
573 It is necessary to support making multiple assignments to a shadow
574 -- specifically, after testing a shadow for definedness, it needs
575 to be made defined. But IR's SSA property disallows this.
576
577 (2) (more important reason): Therefore, when a shadow needs to get
578 a new value, a new temporary is created, the value is assigned to
579 that, and the tmpMap is updated to reflect the new binding.
580
581 A corollary is that if the tmpMap maps a given tmp to
582 INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
583 there's a read-before-write error in the original tmps. The IR
584 sanity checker should catch all such anomalies, however.
585 */
586
587 /* Find the tmp currently shadowing the given original tmp. If none
588 so far exists, allocate one. */
findShadowTmp(MCEnv * mce,IRTemp orig)589 static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
590 {
591 tl_assert(orig < mce->n_originalTmps);
592 if (mce->tmpMap[orig] == IRTemp_INVALID) {
593 mce->tmpMap[orig]
594 = newIRTemp(mce->bb->tyenv,
595 shadowType(mce->bb->tyenv->types[orig]));
596 }
597 return mce->tmpMap[orig];
598 }
599
600 /* Allocate a new shadow for the given original tmp. This means any
601 previous shadow is abandoned. This is needed because it is
602 necessary to give a new value to a shadow once it has been tested
603 for undefinedness, but unfortunately IR's SSA property disallows
604 this. Instead we must abandon the old shadow, allocate a new one
605 and use that instead. */
newShadowTmp(MCEnv * mce,IRTemp orig)606 static void newShadowTmp ( MCEnv* mce, IRTemp orig )
607 {
608 tl_assert(orig < mce->n_originalTmps);
609 mce->tmpMap[orig]
610 = newIRTemp(mce->bb->tyenv,
611 shadowType(mce->bb->tyenv->types[orig]));
612 }
613
614
615 /*------------------------------------------------------------*/
616 /*--- IRAtoms -- a subset of IRExprs ---*/
617 /*------------------------------------------------------------*/
618
619 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
620 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
621 input, most of this code deals in atoms. Usefully, a value atom
622 always has a V-value which is also an atom: constants are shadowed
623 by constants, and temps are shadowed by the corresponding shadow
624 temporary. */
625
626 typedef IRExpr IRAtom;
627
628 /* (used for sanity checks only): is this an atom which looks
629 like it's from original code? */
isOriginalAtom(MCEnv * mce,IRAtom * a1)630 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
631 {
632 if (a1->tag == Iex_Const)
633 return True;
634 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
635 return True;
636 return False;
637 }
638
639 /* (used for sanity checks only): is this an atom which looks
640 like it's from shadow code? */
isShadowAtom(MCEnv * mce,IRAtom * a1)641 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
642 {
643 if (a1->tag == Iex_Const)
644 return True;
645 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
646 return True;
647 return False;
648 }
649
650 /* (used for sanity checks only): check that both args are atoms and
651 are identically-kinded. */
sameKindedAtoms(IRAtom * a1,IRAtom * a2)652 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
653 {
654 if (a1->tag == Iex_RdTmp && a1->tag == Iex_RdTmp)
655 return True;
656 if (a1->tag == Iex_Const && a1->tag == Iex_Const)
657 return True;
658 return False;
659 }
660
661
662 /*------------------------------------------------------------*/
663 /*--- Type management ---*/
664 /*------------------------------------------------------------*/
665
666 /* Shadow state is always accessed using integer types. This returns
667 an integer type with the same size (as per sizeofIRType) as the
668 given type. The only valid shadow types are Bit, I8, I16, I32,
669 I64, V128. */
670
shadowType(IRType ty)671 static IRType shadowType ( IRType ty )
672 {
673 switch (ty) {
674 case Ity_I1:
675 case Ity_I8:
676 case Ity_I16:
677 case Ity_I32:
678 case Ity_I64: return ty;
679 case Ity_F32: return Ity_I32;
680 case Ity_F64: return Ity_I64;
681 case Ity_V128: return Ity_V128;
682 default: ppIRType(ty);
683 VG_(tool_panic)("memcheck:shadowType");
684 }
685 }
686
687 /* Produce a 'defined' value of the given shadow type. Should only be
688 supplied shadow types (Bit/I8/I16/I32/UI64). */
definedOfType(IRType ty)689 static IRExpr* definedOfType ( IRType ty ) {
690 switch (ty) {
691 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
692 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
693 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
694 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
695 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
696 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
697 default: VG_(tool_panic)("memcheck:definedOfType");
698 }
699 }
700
701
702 /*------------------------------------------------------------*/
703 /*--- Constructing IR fragments ---*/
704 /*------------------------------------------------------------*/
705
706 /* assign value to tmp */
707 #define assign(_bb,_tmp,_expr) \
708 addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
709
710 /* add stmt to a bb */
711 #define stmt(_bb,_stmt) \
712 addStmtToIRSB((_bb), (_stmt))
713
714 /* build various kinds of expressions */
715 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
716 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
717 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
718 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
719 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
720 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
721 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
722 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
723
724 /* bind the given expression to a new temporary, and return the
725 temporary. This effectively converts an arbitrary expression into
726 an atom. */
assignNew(MCEnv * mce,IRType ty,IRExpr * e)727 static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
728 IRTemp t = newIRTemp(mce->bb->tyenv, ty);
729 assign(mce->bb, t, e);
730 return mkexpr(t);
731 }
732
733
734 /*------------------------------------------------------------*/
735 /*--- Constructing definedness primitive ops ---*/
736 /*------------------------------------------------------------*/
737
738 /* --------- Defined-if-either-defined --------- */
739
mkDifD8(MCEnv * mce,IRAtom * a1,IRAtom * a2)740 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
741 tl_assert(isShadowAtom(mce,a1));
742 tl_assert(isShadowAtom(mce,a2));
743 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
744 }
745
mkDifD16(MCEnv * mce,IRAtom * a1,IRAtom * a2)746 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
747 tl_assert(isShadowAtom(mce,a1));
748 tl_assert(isShadowAtom(mce,a2));
749 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
750 }
751
mkDifD32(MCEnv * mce,IRAtom * a1,IRAtom * a2)752 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
753 tl_assert(isShadowAtom(mce,a1));
754 tl_assert(isShadowAtom(mce,a2));
755 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
756 }
757
mkDifD64(MCEnv * mce,IRAtom * a1,IRAtom * a2)758 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
759 tl_assert(isShadowAtom(mce,a1));
760 tl_assert(isShadowAtom(mce,a2));
761 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
762 }
763
mkDifDV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)764 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
765 tl_assert(isShadowAtom(mce,a1));
766 tl_assert(isShadowAtom(mce,a2));
767 return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
768 }
769
770 /* --------- Undefined-if-either-undefined --------- */
771
mkUifU8(MCEnv * mce,IRAtom * a1,IRAtom * a2)772 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
773 tl_assert(isShadowAtom(mce,a1));
774 tl_assert(isShadowAtom(mce,a2));
775 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
776 }
777
mkUifU16(MCEnv * mce,IRAtom * a1,IRAtom * a2)778 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
779 tl_assert(isShadowAtom(mce,a1));
780 tl_assert(isShadowAtom(mce,a2));
781 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
782 }
783
mkUifU32(MCEnv * mce,IRAtom * a1,IRAtom * a2)784 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
785 tl_assert(isShadowAtom(mce,a1));
786 tl_assert(isShadowAtom(mce,a2));
787 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
788 }
789
mkUifU64(MCEnv * mce,IRAtom * a1,IRAtom * a2)790 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
791 tl_assert(isShadowAtom(mce,a1));
792 tl_assert(isShadowAtom(mce,a2));
793 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
794 }
795
mkUifUV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)796 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
797 tl_assert(isShadowAtom(mce,a1));
798 tl_assert(isShadowAtom(mce,a2));
799 return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
800 }
801
mkUifU(MCEnv * mce,IRType vty,IRAtom * a1,IRAtom * a2)802 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
803 switch (vty) {
804 case Ity_I8: return mkUifU8(mce, a1, a2);
805 case Ity_I16: return mkUifU16(mce, a1, a2);
806 case Ity_I32: return mkUifU32(mce, a1, a2);
807 case Ity_I64: return mkUifU64(mce, a1, a2);
808 case Ity_V128: return mkUifUV128(mce, a1, a2);
809 default:
810 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
811 VG_(tool_panic)("memcheck:mkUifU");
812 }
813 }
814
815 /* --------- The Left-family of operations. --------- */
816
mkLeft8(MCEnv * mce,IRAtom * a1)817 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
818 tl_assert(isShadowAtom(mce,a1));
819 /* It's safe to duplicate a1 since it's only an atom */
820 return assignNew(mce, Ity_I8,
821 binop(Iop_Or8, a1,
822 assignNew(mce, Ity_I8,
823 /* unop(Iop_Neg8, a1)))); */
824 binop(Iop_Sub8, mkU8(0), a1) )));
825 }
826
mkLeft16(MCEnv * mce,IRAtom * a1)827 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
828 tl_assert(isShadowAtom(mce,a1));
829 /* It's safe to duplicate a1 since it's only an atom */
830 return assignNew(mce, Ity_I16,
831 binop(Iop_Or16, a1,
832 assignNew(mce, Ity_I16,
833 /* unop(Iop_Neg16, a1)))); */
834 binop(Iop_Sub16, mkU16(0), a1) )));
835 }
836
mkLeft32(MCEnv * mce,IRAtom * a1)837 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
838 tl_assert(isShadowAtom(mce,a1));
839 /* It's safe to duplicate a1 since it's only an atom */
840 return assignNew(mce, Ity_I32,
841 binop(Iop_Or32, a1,
842 assignNew(mce, Ity_I32,
843 /* unop(Iop_Neg32, a1)))); */
844 binop(Iop_Sub32, mkU32(0), a1) )));
845 }
846
847 /* --------- 'Improvement' functions for AND/OR. --------- */
848
849 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
850 defined (0); all other -> undefined (1).
851 */
mkImproveAND8(MCEnv * mce,IRAtom * data,IRAtom * vbits)852 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
853 {
854 tl_assert(isOriginalAtom(mce, data));
855 tl_assert(isShadowAtom(mce, vbits));
856 tl_assert(sameKindedAtoms(data, vbits));
857 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
858 }
859
mkImproveAND16(MCEnv * mce,IRAtom * data,IRAtom * vbits)860 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
861 {
862 tl_assert(isOriginalAtom(mce, data));
863 tl_assert(isShadowAtom(mce, vbits));
864 tl_assert(sameKindedAtoms(data, vbits));
865 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
866 }
867
mkImproveAND32(MCEnv * mce,IRAtom * data,IRAtom * vbits)868 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
869 {
870 tl_assert(isOriginalAtom(mce, data));
871 tl_assert(isShadowAtom(mce, vbits));
872 tl_assert(sameKindedAtoms(data, vbits));
873 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
874 }
875
mkImproveAND64(MCEnv * mce,IRAtom * data,IRAtom * vbits)876 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
877 {
878 tl_assert(isOriginalAtom(mce, data));
879 tl_assert(isShadowAtom(mce, vbits));
880 tl_assert(sameKindedAtoms(data, vbits));
881 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
882 }
883
mkImproveANDV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)884 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
885 {
886 tl_assert(isOriginalAtom(mce, data));
887 tl_assert(isShadowAtom(mce, vbits));
888 tl_assert(sameKindedAtoms(data, vbits));
889 return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
890 }
891
892 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
893 defined (0); all other -> undefined (1).
894 */
mkImproveOR8(MCEnv * mce,IRAtom * data,IRAtom * vbits)895 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
896 {
897 tl_assert(isOriginalAtom(mce, data));
898 tl_assert(isShadowAtom(mce, vbits));
899 tl_assert(sameKindedAtoms(data, vbits));
900 return assignNew(
901 mce, Ity_I8,
902 binop(Iop_Or8,
903 assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
904 vbits) );
905 }
906
mkImproveOR16(MCEnv * mce,IRAtom * data,IRAtom * vbits)907 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
908 {
909 tl_assert(isOriginalAtom(mce, data));
910 tl_assert(isShadowAtom(mce, vbits));
911 tl_assert(sameKindedAtoms(data, vbits));
912 return assignNew(
913 mce, Ity_I16,
914 binop(Iop_Or16,
915 assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
916 vbits) );
917 }
918
mkImproveOR32(MCEnv * mce,IRAtom * data,IRAtom * vbits)919 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
920 {
921 tl_assert(isOriginalAtom(mce, data));
922 tl_assert(isShadowAtom(mce, vbits));
923 tl_assert(sameKindedAtoms(data, vbits));
924 return assignNew(
925 mce, Ity_I32,
926 binop(Iop_Or32,
927 assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
928 vbits) );
929 }
930
mkImproveOR64(MCEnv * mce,IRAtom * data,IRAtom * vbits)931 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
932 {
933 tl_assert(isOriginalAtom(mce, data));
934 tl_assert(isShadowAtom(mce, vbits));
935 tl_assert(sameKindedAtoms(data, vbits));
936 return assignNew(
937 mce, Ity_I64,
938 binop(Iop_Or64,
939 assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
940 vbits) );
941 }
942
mkImproveORV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)943 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
944 {
945 tl_assert(isOriginalAtom(mce, data));
946 tl_assert(isShadowAtom(mce, vbits));
947 tl_assert(sameKindedAtoms(data, vbits));
948 return assignNew(
949 mce, Ity_V128,
950 binop(Iop_OrV128,
951 assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
952 vbits) );
953 }
954
955 /* --------- Pessimising casts. --------- */
956
mkPCastTo(MCEnv * mce,IRType dst_ty,IRAtom * vbits)957 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
958 {
959 IRType ty;
960 IRAtom* tmp1;
961 /* Note, dst_ty is a shadow type, not an original type. */
962 /* First of all, collapse vbits down to a single bit. */
963 tl_assert(isShadowAtom(mce,vbits));
964 ty = typeOfIRExpr(mce->bb->tyenv, vbits);
965 tmp1 = NULL;
966 switch (ty) {
967 case Ity_I1:
968 tmp1 = vbits;
969 break;
970 case Ity_I8:
971 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
972 break;
973 case Ity_I16:
974 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
975 break;
976 case Ity_I32:
977 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
978 break;
979 case Ity_I64:
980 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
981 break;
982 default:
983 VG_(tool_panic)("mkPCastTo(1)");
984 }
985 tl_assert(tmp1);
986 /* Now widen up to the dst type. */
987 switch (dst_ty) {
988 case Ity_I1:
989 return tmp1;
990 case Ity_I8:
991 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
992 case Ity_I16:
993 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
994 case Ity_I32:
995 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
996 case Ity_I64:
997 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
998 case Ity_V128:
999 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1000 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
1001 return tmp1;
1002 default:
1003 ppIRType(dst_ty);
1004 VG_(tool_panic)("mkPCastTo(2)");
1005 }
1006 }
1007
1008
1009 /*------------------------------------------------------------*/
1010 /*--- Emit a test and complaint if something is undefined. ---*/
1011 /*------------------------------------------------------------*/
1012
1013 /* Set the annotations on a dirty helper to indicate that the stack
1014 pointer and instruction pointers might be read. This is the
1015 behaviour of all 'emit-a-complaint' style functions we might
1016 call. */
1017
setHelperAnns(MCEnv * mce,IRDirty * di)1018 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1019 di->nFxState = 2;
1020 di->fxState[0].fx = Ifx_Read;
1021 di->fxState[0].offset = mce->layout->offset_SP;
1022 di->fxState[0].size = mce->layout->sizeof_SP;
1023 di->fxState[1].fx = Ifx_Read;
1024 di->fxState[1].offset = mce->layout->offset_IP;
1025 di->fxState[1].size = mce->layout->sizeof_IP;
1026 }
1027
1028
1029 /* Check the supplied **original** atom for undefinedness, and emit a
1030 complaint if so. Once that happens, mark it as defined. This is
1031 possible because the atom is either a tmp or literal. If it's a
1032 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1033 be defined. In fact as mentioned above, we will have to allocate a
1034 new tmp to carry the new 'defined' shadow value, and update the
1035 original->tmp mapping accordingly; we cannot simply assign a new
1036 value to an existing shadow tmp as this breaks SSAness -- resulting
1037 in the post-instrumentation sanity checker spluttering in disapproval.
1038 */
complainIfUndefined(MCEnv * mce,IRAtom * atom)1039 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1040 {
1041 IRAtom* vatom;
1042 IRType ty;
1043 Int sz;
1044 IRDirty* di;
1045 IRAtom* cond;
1046
1047 /* Since the original expression is atomic, there's no duplicated
1048 work generated by making multiple V-expressions for it. So we
1049 don't really care about the possibility that someone else may
1050 also create a V-interpretion for it. */
1051 tl_assert(isOriginalAtom(mce, atom));
1052 vatom = expr2vbits( mce, atom );
1053 tl_assert(isShadowAtom(mce, vatom));
1054 tl_assert(sameKindedAtoms(atom, vatom));
1055
1056 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1057
1058 /* sz is only used for constructing the error message */
1059 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1060
1061 cond = mkPCastTo( mce, Ity_I1, vatom );
1062 /* cond will be 0 if all defined, and 1 if any not defined. */
1063
1064 switch (sz) {
1065 case 0:
1066 di = unsafeIRDirty_0_N( 0/*regparms*/,
1067 "MC_(helperc_value_check0_fail)",
1068 &MC_(helperc_value_check0_fail),
1069 mkIRExprVec_0()
1070 );
1071 break;
1072 case 1:
1073 di = unsafeIRDirty_0_N( 0/*regparms*/,
1074 "MC_(helperc_value_check1_fail)",
1075 &MC_(helperc_value_check1_fail),
1076 mkIRExprVec_0()
1077 );
1078 break;
1079 case 4:
1080 di = unsafeIRDirty_0_N( 0/*regparms*/,
1081 "MC_(helperc_value_check4_fail)",
1082 &MC_(helperc_value_check4_fail),
1083 mkIRExprVec_0()
1084 );
1085 break;
1086 default:
1087 di = unsafeIRDirty_0_N( 1/*regparms*/,
1088 "MC_(helperc_complain_undef)",
1089 &MC_(helperc_complain_undef),
1090 mkIRExprVec_1( mkIRExpr_HWord( sz ))
1091 );
1092 break;
1093 }
1094 di->guard = cond;
1095 setHelperAnns( mce, di );
1096 stmt( mce->bb, IRStmt_Dirty(di));
1097
1098 /* Set the shadow tmp to be defined. First, update the
1099 orig->shadow tmp mapping to reflect the fact that this shadow is
1100 getting a new value. */
1101 tl_assert(isIRAtom(vatom));
1102 /* sameKindedAtoms ... */
1103 if (vatom->tag == Iex_RdTmp) {
1104 tl_assert(atom->tag == Iex_RdTmp);
1105 newShadowTmp(mce, atom->Iex.RdTmp.tmp);
1106 assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
1107 definedOfType(ty));
1108 }
1109 }
1110
1111
1112 /*------------------------------------------------------------*/
1113 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1114 /*------------------------------------------------------------*/
1115
1116 /* Examine the always-defined sections declared in layout to see if
1117 the (offset,size) section is within one. Note, is is an error to
1118 partially fall into such a region: (offset,size) should either be
1119 completely in such a region or completely not-in such a region.
1120 */
isAlwaysDefd(MCEnv * mce,Int offset,Int size)1121 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1122 {
1123 Int minoffD, maxoffD, i;
1124 Int minoff = offset;
1125 Int maxoff = minoff + size - 1;
1126 tl_assert((minoff & ~0xFFFF) == 0);
1127 tl_assert((maxoff & ~0xFFFF) == 0);
1128
1129 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1130 minoffD = mce->layout->alwaysDefd[i].offset;
1131 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1132 tl_assert((minoffD & ~0xFFFF) == 0);
1133 tl_assert((maxoffD & ~0xFFFF) == 0);
1134
1135 if (maxoff < minoffD || maxoffD < minoff)
1136 continue; /* no overlap */
1137 if (minoff >= minoffD && maxoff <= maxoffD)
1138 return True; /* completely contained in an always-defd section */
1139
1140 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1141 }
1142 return False; /* could not find any containing section */
1143 }
1144
1145
1146 /* Generate into bb suitable actions to shadow this Put. If the state
1147 slice is marked 'always defined', do nothing. Otherwise, write the
1148 supplied V bits to the shadow state. We can pass in either an
1149 original atom or a V-atom, but not both. In the former case the
1150 relevant V-bits are then generated from the original.
1151 */
1152 static
do_shadow_PUT(MCEnv * mce,Int offset,IRAtom * atom,IRAtom * vatom)1153 void do_shadow_PUT ( MCEnv* mce, Int offset,
1154 IRAtom* atom, IRAtom* vatom )
1155 {
1156 IRType ty;
1157 if (atom) {
1158 tl_assert(!vatom);
1159 tl_assert(isOriginalAtom(mce, atom));
1160 vatom = expr2vbits( mce, atom );
1161 } else {
1162 tl_assert(vatom);
1163 tl_assert(isShadowAtom(mce, vatom));
1164 }
1165
1166 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1167 tl_assert(ty != Ity_I1);
1168 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1169 /* later: no ... */
1170 /* emit code to emit a complaint if any of the vbits are 1. */
1171 /* complainIfUndefined(mce, atom); */
1172 } else {
1173 /* Do a plain shadow Put. */
1174 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1175 }
1176 }
1177
1178
1179 /* Return an expression which contains the V bits corresponding to the
1180 given GETI (passed in in pieces).
1181 */
1182 static
do_shadow_PUTI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias,IRAtom * atom)1183 void do_shadow_PUTI ( MCEnv* mce,
1184 IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
1185 {
1186 IRAtom* vatom;
1187 IRType ty, tyS;
1188 Int arrSize;;
1189
1190 tl_assert(isOriginalAtom(mce,atom));
1191 vatom = expr2vbits( mce, atom );
1192 tl_assert(sameKindedAtoms(atom, vatom));
1193 ty = descr->elemTy;
1194 tyS = shadowType(ty);
1195 arrSize = descr->nElems * sizeofIRType(ty);
1196 tl_assert(ty != Ity_I1);
1197 tl_assert(isOriginalAtom(mce,ix));
1198 complainIfUndefined(mce,ix);
1199 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1200 /* later: no ... */
1201 /* emit code to emit a complaint if any of the vbits are 1. */
1202 /* complainIfUndefined(mce, atom); */
1203 } else {
1204 /* Do a cloned version of the Put that refers to the shadow
1205 area. */
1206 IRRegArray* new_descr
1207 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1208 tyS, descr->nElems);
1209 stmt( mce->bb, IRStmt_PutI( mkIRPutI( new_descr, ix, bias, vatom ) ));
1210 }
1211 }
1212
1213
1214 /* Return an expression which contains the V bits corresponding to the
1215 given GET (passed in in pieces).
1216 */
1217 static
shadow_GET(MCEnv * mce,Int offset,IRType ty)1218 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1219 {
1220 IRType tyS = shadowType(ty);
1221 tl_assert(ty != Ity_I1);
1222 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1223 /* Always defined, return all zeroes of the relevant type */
1224 return definedOfType(tyS);
1225 } else {
1226 /* return a cloned version of the Get that refers to the shadow
1227 area. */
1228 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1229 }
1230 }
1231
1232
1233 /* Return an expression which contains the V bits corresponding to the
1234 given GETI (passed in in pieces).
1235 */
1236 static
shadow_GETI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias)1237 IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
1238 {
1239 IRType ty = descr->elemTy;
1240 IRType tyS = shadowType(ty);
1241 Int arrSize = descr->nElems * sizeofIRType(ty);
1242 tl_assert(ty != Ity_I1);
1243 tl_assert(isOriginalAtom(mce,ix));
1244 complainIfUndefined(mce,ix);
1245 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1246 /* Always defined, return all zeroes of the relevant type */
1247 return definedOfType(tyS);
1248 } else {
1249 /* return a cloned version of the Get that refers to the shadow
1250 area. */
1251 IRRegArray* new_descr
1252 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1253 tyS, descr->nElems);
1254 return IRExpr_GetI( new_descr, ix, bias );
1255 }
1256 }
1257
1258
1259 /*------------------------------------------------------------*/
1260 /*--- Generating approximations for unknown operations, ---*/
1261 /*--- using lazy-propagate semantics ---*/
1262 /*------------------------------------------------------------*/
1263
1264 /* Lazy propagation of undefinedness from two values, resulting in the
1265 specified shadow type.
1266 */
1267 static
mkLazy2(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2)1268 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1269 {
1270 /* force everything via 32-bit intermediaries. */
1271 IRAtom* at;
1272 tl_assert(isShadowAtom(mce,va1));
1273 tl_assert(isShadowAtom(mce,va2));
1274 at = mkPCastTo(mce, Ity_I32, va1);
1275 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1276 at = mkPCastTo(mce, finalVty, at);
1277 return at;
1278 }
1279
1280
1281 /* Do the lazy propagation game from a null-terminated vector of
1282 atoms. This is presumably the arguments to a helper call, so the
1283 IRCallee info is also supplied in order that we can know which
1284 arguments should be ignored (via the .mcx_mask field).
1285 */
1286 static
mkLazyN(MCEnv * mce,IRAtom ** exprvec,IRType finalVtype,IRCallee * cee)1287 IRAtom* mkLazyN ( MCEnv* mce,
1288 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1289 {
1290 Int i;
1291 IRAtom* here;
1292 IRAtom* curr = definedOfType(Ity_I32);
1293 for (i = 0; exprvec[i]; i++) {
1294 tl_assert(i < 32);
1295 tl_assert(isOriginalAtom(mce, exprvec[i]));
1296 /* Only take notice of this arg if the callee's mc-exclusion
1297 mask does not say it is to be excluded. */
1298 if (cee->mcx_mask & (1<<i)) {
1299 /* the arg is to be excluded from definedness checking. Do
1300 nothing. */
1301 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1302 } else {
1303 /* calculate the arg's definedness, and pessimistically merge
1304 it in. */
1305 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1306 curr = mkUifU32(mce, here, curr);
1307 }
1308 }
1309 return mkPCastTo(mce, finalVtype, curr );
1310 }
1311
1312
1313 /*------------------------------------------------------------*/
1314 /*--- Generating expensive sequences for exact carry-chain ---*/
1315 /*--- propagation in add/sub and related operations. ---*/
1316 /*------------------------------------------------------------*/
1317
1318 static
1319 __attribute__((unused))
expensiveAdd32(MCEnv * mce,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1320 IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
1321 IRAtom* aa, IRAtom* bb )
1322 {
1323 IRAtom *a_min, *b_min, *a_max, *b_max;
1324 IRType ty;
1325 IROp opAND, opOR, opXOR, opNOT, opADD;
1326
1327 tl_assert(isShadowAtom(mce,qaa));
1328 tl_assert(isShadowAtom(mce,qbb));
1329 tl_assert(isOriginalAtom(mce,aa));
1330 tl_assert(isOriginalAtom(mce,bb));
1331 tl_assert(sameKindedAtoms(qaa,aa));
1332 tl_assert(sameKindedAtoms(qbb,bb));
1333
1334 ty = Ity_I32;
1335 opAND = Iop_And32;
1336 opOR = Iop_Or32;
1337 opXOR = Iop_Xor32;
1338 opNOT = Iop_Not32;
1339 opADD = Iop_Add32;
1340
1341 // a_min = aa & ~qaa
1342 a_min = assignNew(mce,ty,
1343 binop(opAND, aa,
1344 assignNew(mce,ty, unop(opNOT, qaa))));
1345
1346 // b_min = bb & ~qbb
1347 b_min = assignNew(mce,ty,
1348 binop(opAND, bb,
1349 assignNew(mce,ty, unop(opNOT, qbb))));
1350
1351 // a_max = aa | qaa
1352 a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1353
1354 // b_max = bb | qbb
1355 b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1356
1357 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1358 return
1359 assignNew(mce,ty,
1360 binop( opOR,
1361 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1362 assignNew(mce,ty,
1363 binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
1364 assignNew(mce,ty, binop(opADD, a_max, b_max))
1365 )
1366 )
1367 )
1368 );
1369 }
1370
1371
1372 /*------------------------------------------------------------*/
1373 /*--- Helpers for dealing with vector primops. ---*/
1374 /*------------------------------------------------------------*/
1375
1376 /* Vector pessimisation -- pessimise within each lane individually. */
1377
mkPCast8x16(MCEnv * mce,IRAtom * at)1378 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1379 {
1380 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1381 }
1382
mkPCast16x8(MCEnv * mce,IRAtom * at)1383 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1384 {
1385 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1386 }
1387
mkPCast32x4(MCEnv * mce,IRAtom * at)1388 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1389 {
1390 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1391 }
1392
mkPCast64x2(MCEnv * mce,IRAtom * at)1393 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1394 {
1395 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1396 }
1397
1398
1399 /* Here's a simple scheme capable of handling ops derived from SSE1
1400 code and while only generating ops that can be efficiently
1401 implemented in SSE1. */
1402
1403 /* All-lanes versions are straightforward:
1404
1405 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
1406
1407 unary32Fx4(x,y) ==> PCast32x4(x#)
1408
1409 Lowest-lane-only versions are more complex:
1410
1411 binary32F0x4(x,y) ==> SetV128lo32(
1412 x#,
1413 PCast32(V128to32(UifUV128(x#,y#)))
1414 )
1415
1416 This is perhaps not so obvious. In particular, it's faster to
1417 do a V128-bit UifU and then take the bottom 32 bits than the more
1418 obvious scheme of taking the bottom 32 bits of each operand
1419 and doing a 32-bit UifU. Basically since UifU is fast and
1420 chopping lanes off vector values is slow.
1421
1422 Finally:
1423
1424 unary32F0x4(x) ==> SetV128lo32(
1425 x#,
1426 PCast32(V128to32(x#))
1427 )
1428
1429 Where:
1430
1431 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1432 PCast32x4(v#) = CmpNEZ32x4(v#)
1433 */
1434
1435 static
binary32Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1436 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1437 {
1438 IRAtom* at;
1439 tl_assert(isShadowAtom(mce, vatomX));
1440 tl_assert(isShadowAtom(mce, vatomY));
1441 at = mkUifUV128(mce, vatomX, vatomY);
1442 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
1443 return at;
1444 }
1445
1446 static
unary32Fx4(MCEnv * mce,IRAtom * vatomX)1447 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1448 {
1449 IRAtom* at;
1450 tl_assert(isShadowAtom(mce, vatomX));
1451 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
1452 return at;
1453 }
1454
1455 static
binary32F0x4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1456 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1457 {
1458 IRAtom* at;
1459 tl_assert(isShadowAtom(mce, vatomX));
1460 tl_assert(isShadowAtom(mce, vatomY));
1461 at = mkUifUV128(mce, vatomX, vatomY);
1462 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
1463 at = mkPCastTo(mce, Ity_I32, at);
1464 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1465 return at;
1466 }
1467
1468 static
unary32F0x4(MCEnv * mce,IRAtom * vatomX)1469 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1470 {
1471 IRAtom* at;
1472 tl_assert(isShadowAtom(mce, vatomX));
1473 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
1474 at = mkPCastTo(mce, Ity_I32, at);
1475 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1476 return at;
1477 }
1478
1479 /* --- ... and ... 64Fx2 versions of the same ... --- */
1480
1481 static
binary64Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1482 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1483 {
1484 IRAtom* at;
1485 tl_assert(isShadowAtom(mce, vatomX));
1486 tl_assert(isShadowAtom(mce, vatomY));
1487 at = mkUifUV128(mce, vatomX, vatomY);
1488 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
1489 return at;
1490 }
1491
1492 static
unary64Fx2(MCEnv * mce,IRAtom * vatomX)1493 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1494 {
1495 IRAtom* at;
1496 tl_assert(isShadowAtom(mce, vatomX));
1497 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
1498 return at;
1499 }
1500
1501 static
binary64F0x2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1502 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1503 {
1504 IRAtom* at;
1505 tl_assert(isShadowAtom(mce, vatomX));
1506 tl_assert(isShadowAtom(mce, vatomY));
1507 at = mkUifUV128(mce, vatomX, vatomY);
1508 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
1509 at = mkPCastTo(mce, Ity_I64, at);
1510 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1511 return at;
1512 }
1513
1514 static
unary64F0x2(MCEnv * mce,IRAtom * vatomX)1515 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1516 {
1517 IRAtom* at;
1518 tl_assert(isShadowAtom(mce, vatomX));
1519 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
1520 at = mkPCastTo(mce, Ity_I64, at);
1521 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1522 return at;
1523 }
1524
1525 /* --- --- Vector saturated narrowing --- --- */
1526
1527 /* This is quite subtle. What to do is simple:
1528
1529 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1530
1531 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1532
1533 Why this is right is not so simple. Consider a lane in the args,
1534 vatom1 or 2, doesn't matter.
1535
1536 After the PCast, that lane is all 0s (defined) or all
1537 1s(undefined).
1538
1539 Both signed and unsigned saturating narrowing of all 0s produces
1540 all 0s, which is what we want.
1541
1542 The all-1s case is more complex. Unsigned narrowing interprets an
1543 all-1s input as the largest unsigned integer, and so produces all
1544 1s as a result since that is the largest unsigned value at the
1545 smaller width.
1546
1547 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1548 to -1, so we still wind up with all 1s at the smaller width.
1549
1550 So: In short, pessimise the args, then apply the original narrowing
1551 op.
1552 */
1553 static
vectorNarrowV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)1554 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1555 IRAtom* vatom1, IRAtom* vatom2)
1556 {
1557 IRAtom *at1, *at2, *at3;
1558 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1559 switch (narrow_op) {
1560 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
1561 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
1562 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
1563 default: VG_(tool_panic)("vectorNarrowV128");
1564 }
1565 tl_assert(isShadowAtom(mce,vatom1));
1566 tl_assert(isShadowAtom(mce,vatom2));
1567 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1568 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1569 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1570 return at3;
1571 }
1572
1573
1574 /* --- --- Vector integer arithmetic --- --- */
1575
1576 /* Simple ... UifU the args and per-lane pessimise the results. */
1577 static
binary8Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1578 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1579 {
1580 IRAtom* at;
1581 at = mkUifUV128(mce, vatom1, vatom2);
1582 at = mkPCast8x16(mce, at);
1583 return at;
1584 }
1585
1586 static
binary16Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1587 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1588 {
1589 IRAtom* at;
1590 at = mkUifUV128(mce, vatom1, vatom2);
1591 at = mkPCast16x8(mce, at);
1592 return at;
1593 }
1594
1595 static
binary32Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1596 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1597 {
1598 IRAtom* at;
1599 at = mkUifUV128(mce, vatom1, vatom2);
1600 at = mkPCast32x4(mce, at);
1601 return at;
1602 }
1603
1604 static
binary64Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1605 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1606 {
1607 IRAtom* at;
1608 at = mkUifUV128(mce, vatom1, vatom2);
1609 at = mkPCast64x2(mce, at);
1610 return at;
1611 }
1612
1613
1614 /*------------------------------------------------------------*/
1615 /*--- Generate shadow values from all kinds of IRExprs. ---*/
1616 /*------------------------------------------------------------*/
1617
1618 static
expr2vbits_Binop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2)1619 IRAtom* expr2vbits_Binop ( MCEnv* mce,
1620 IROp op,
1621 IRAtom* atom1, IRAtom* atom2 )
1622 {
1623 IRType and_or_ty;
1624 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
1625 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
1626 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1627
1628 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1629 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1630
1631 tl_assert(isOriginalAtom(mce,atom1));
1632 tl_assert(isOriginalAtom(mce,atom2));
1633 tl_assert(isShadowAtom(mce,vatom1));
1634 tl_assert(isShadowAtom(mce,vatom2));
1635 tl_assert(sameKindedAtoms(atom1,vatom1));
1636 tl_assert(sameKindedAtoms(atom2,vatom2));
1637 switch (op) {
1638
1639 /* V128-bit SIMD (SSE2-esque) */
1640
1641 case Iop_ShrN16x8:
1642 case Iop_ShrN32x4:
1643 case Iop_ShrN64x2:
1644 case Iop_SarN16x8:
1645 case Iop_SarN32x4:
1646 case Iop_ShlN16x8:
1647 case Iop_ShlN32x4:
1648 case Iop_ShlN64x2:
1649 /* Same scheme as with all other shifts. */
1650 complainIfUndefined(mce, atom2);
1651 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1652
1653 case Iop_QSub8Ux16:
1654 case Iop_QSub8Sx16:
1655 case Iop_Sub8x16:
1656 case Iop_Min8Ux16:
1657 case Iop_Max8Ux16:
1658 case Iop_CmpGT8Sx16:
1659 case Iop_CmpEQ8x16:
1660 case Iop_Avg8Ux16:
1661 case Iop_QAdd8Ux16:
1662 case Iop_QAdd8Sx16:
1663 case Iop_Add8x16:
1664 return binary8Ix16(mce, vatom1, vatom2);
1665
1666 case Iop_QSub16Ux8:
1667 case Iop_QSub16Sx8:
1668 case Iop_Sub16x8:
1669 case Iop_Mul16x8:
1670 case Iop_MulHi16Sx8:
1671 case Iop_MulHi16Ux8:
1672 case Iop_Min16Sx8:
1673 case Iop_Max16Sx8:
1674 case Iop_CmpGT16Sx8:
1675 case Iop_CmpEQ16x8:
1676 case Iop_Avg16Ux8:
1677 case Iop_QAdd16Ux8:
1678 case Iop_QAdd16Sx8:
1679 case Iop_Add16x8:
1680 return binary16Ix8(mce, vatom1, vatom2);
1681
1682 case Iop_Sub32x4:
1683 case Iop_QSub32Sx4:
1684 case Iop_QSub32Ux4:
1685 case Iop_CmpGT32Sx4:
1686 case Iop_CmpEQ32x4:
1687 case Iop_Add32x4:
1688 case Iop_QAdd32Ux4:
1689 case Iop_QAdd32Sx4:
1690 return binary32Ix4(mce, vatom1, vatom2);
1691
1692 case Iop_Sub64x2:
1693 case Iop_QSub64Ux2:
1694 case Iop_QSub64Sx2:
1695 case Iop_Add64x2:
1696 case Iop_QAdd64Ux2:
1697 case Iop_QAdd64Sx2:
1698 return binary64Ix2(mce, vatom1, vatom2);
1699
1700 case Iop_QNarrowBin32Sto16Sx8:
1701 case Iop_QNarrowBin16Sto8Sx16:
1702 case Iop_QNarrowBin16Sto8Ux16:
1703 return vectorNarrowV128(mce, op, vatom1, vatom2);
1704
1705 case Iop_Sub64Fx2:
1706 case Iop_Mul64Fx2:
1707 case Iop_Min64Fx2:
1708 case Iop_Max64Fx2:
1709 case Iop_Div64Fx2:
1710 case Iop_CmpLT64Fx2:
1711 case Iop_CmpLE64Fx2:
1712 case Iop_CmpEQ64Fx2:
1713 case Iop_Add64Fx2:
1714 return binary64Fx2(mce, vatom1, vatom2);
1715
1716 case Iop_Sub64F0x2:
1717 case Iop_Mul64F0x2:
1718 case Iop_Min64F0x2:
1719 case Iop_Max64F0x2:
1720 case Iop_Div64F0x2:
1721 case Iop_CmpLT64F0x2:
1722 case Iop_CmpLE64F0x2:
1723 case Iop_CmpEQ64F0x2:
1724 case Iop_Add64F0x2:
1725 return binary64F0x2(mce, vatom1, vatom2);
1726
1727 /* V128-bit SIMD (SSE1-esque) */
1728
1729 case Iop_Sub32Fx4:
1730 case Iop_Mul32Fx4:
1731 case Iop_Min32Fx4:
1732 case Iop_Max32Fx4:
1733 case Iop_Div32Fx4:
1734 case Iop_CmpLT32Fx4:
1735 case Iop_CmpLE32Fx4:
1736 case Iop_CmpEQ32Fx4:
1737 case Iop_Add32Fx4:
1738 return binary32Fx4(mce, vatom1, vatom2);
1739
1740 case Iop_Sub32F0x4:
1741 case Iop_Mul32F0x4:
1742 case Iop_Min32F0x4:
1743 case Iop_Max32F0x4:
1744 case Iop_Div32F0x4:
1745 case Iop_CmpLT32F0x4:
1746 case Iop_CmpLE32F0x4:
1747 case Iop_CmpEQ32F0x4:
1748 case Iop_Add32F0x4:
1749 return binary32F0x4(mce, vatom1, vatom2);
1750
1751 /* V128-bit data-steering */
1752 case Iop_SetV128lo32:
1753 case Iop_SetV128lo64:
1754 case Iop_64HLtoV128:
1755 case Iop_InterleaveLO64x2:
1756 case Iop_InterleaveLO32x4:
1757 case Iop_InterleaveLO16x8:
1758 case Iop_InterleaveLO8x16:
1759 case Iop_InterleaveHI64x2:
1760 case Iop_InterleaveHI32x4:
1761 case Iop_InterleaveHI16x8:
1762 case Iop_InterleaveHI8x16:
1763 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1764
1765 /* Scalar floating point */
1766
1767 // case Iop_RoundF64:
1768 case Iop_F64toI64S:
1769 case Iop_I64StoF64:
1770 /* First arg is I32 (rounding mode), second is F64 or I64
1771 (data). */
1772 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1773
1774 case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1775 /* Takes two F64 args. */
1776 case Iop_F64toI32S:
1777 case Iop_F64toF32:
1778 /* First arg is I32 (rounding mode), second is F64 (data). */
1779 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1780
1781 case Iop_F64toI16S:
1782 /* First arg is I32 (rounding mode), second is F64 (data). */
1783 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1784
1785 case Iop_ScaleF64:
1786 case Iop_Yl2xF64:
1787 case Iop_Yl2xp1F64:
1788 case Iop_PRemF64:
1789 case Iop_AtanF64:
1790 case Iop_AddF64:
1791 case Iop_DivF64:
1792 case Iop_SubF64:
1793 case Iop_MulF64:
1794 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1795
1796 case Iop_CmpF64:
1797 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1798
1799 /* non-FP after here */
1800
1801 case Iop_DivModU64to32:
1802 case Iop_DivModS64to32:
1803 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1804
1805 case Iop_16HLto32:
1806 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
1807 case Iop_32HLto64:
1808 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1809
1810 case Iop_MullS32:
1811 case Iop_MullU32: {
1812 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1813 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1814 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1815 }
1816
1817 case Iop_MullS16:
1818 case Iop_MullU16: {
1819 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1820 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1821 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1822 }
1823
1824 case Iop_MullS8:
1825 case Iop_MullU8: {
1826 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1827 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1828 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1829 }
1830
1831 case Iop_Add32:
1832 # if 0
1833 return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
1834 # endif
1835 case Iop_Sub32:
1836 case Iop_Mul32:
1837 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1838
1839 case Iop_Mul16:
1840 case Iop_Add16:
1841 case Iop_Sub16:
1842 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1843
1844 case Iop_Sub8:
1845 case Iop_Add8:
1846 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1847
1848 case Iop_CmpLE32S: case Iop_CmpLE32U:
1849 case Iop_CmpLT32U: case Iop_CmpLT32S:
1850 case Iop_CmpEQ32: case Iop_CmpNE32:
1851 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1852
1853 case Iop_CmpEQ16: case Iop_CmpNE16:
1854 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1855
1856 case Iop_CmpEQ8: case Iop_CmpNE8:
1857 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1858
1859 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1860 /* Complain if the shift amount is undefined. Then simply
1861 shift the first arg's V bits by the real shift amount. */
1862 complainIfUndefined(mce, atom2);
1863 return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1864
1865 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
1866 /* Same scheme as with 32-bit shifts. */
1867 complainIfUndefined(mce, atom2);
1868 return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1869
1870 case Iop_Shl8: case Iop_Shr8:
1871 /* Same scheme as with 32-bit shifts. */
1872 complainIfUndefined(mce, atom2);
1873 return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1874
1875 case Iop_Shl64: case Iop_Shr64:
1876 /* Same scheme as with 32-bit shifts. */
1877 complainIfUndefined(mce, atom2);
1878 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1879
1880 case Iop_AndV128:
1881 uifu = mkUifUV128; difd = mkDifDV128;
1882 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
1883 case Iop_And64:
1884 uifu = mkUifU64; difd = mkDifD64;
1885 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
1886 case Iop_And32:
1887 uifu = mkUifU32; difd = mkDifD32;
1888 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1889 case Iop_And16:
1890 uifu = mkUifU16; difd = mkDifD16;
1891 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1892 case Iop_And8:
1893 uifu = mkUifU8; difd = mkDifD8;
1894 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1895
1896 case Iop_OrV128:
1897 uifu = mkUifUV128; difd = mkDifDV128;
1898 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
1899 case Iop_Or64:
1900 uifu = mkUifU64; difd = mkDifD64;
1901 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
1902 case Iop_Or32:
1903 uifu = mkUifU32; difd = mkDifD32;
1904 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1905 case Iop_Or16:
1906 uifu = mkUifU16; difd = mkDifD16;
1907 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1908 case Iop_Or8:
1909 uifu = mkUifU8; difd = mkDifD8;
1910 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1911
1912 do_And_Or:
1913 return
1914 assignNew(
1915 mce,
1916 and_or_ty,
1917 difd(mce, uifu(mce, vatom1, vatom2),
1918 difd(mce, improve(mce, atom1, vatom1),
1919 improve(mce, atom2, vatom2) ) ) );
1920
1921 case Iop_Xor8:
1922 return mkUifU8(mce, vatom1, vatom2);
1923 case Iop_Xor16:
1924 return mkUifU16(mce, vatom1, vatom2);
1925 case Iop_Xor32:
1926 return mkUifU32(mce, vatom1, vatom2);
1927 case Iop_Xor64:
1928 return mkUifU64(mce, vatom1, vatom2);
1929 case Iop_XorV128:
1930 return mkUifUV128(mce, vatom1, vatom2);
1931
1932 default:
1933 ppIROp(op);
1934 VG_(tool_panic)("memcheck:expr2vbits_Binop");
1935 }
1936 }
1937
1938
1939 static
expr2vbits_Unop(MCEnv * mce,IROp op,IRAtom * atom)1940 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1941 {
1942 IRAtom* vatom = expr2vbits( mce, atom );
1943 tl_assert(isOriginalAtom(mce,atom));
1944 switch (op) {
1945
1946 case Iop_Sqrt64Fx2:
1947 return unary64Fx2(mce, vatom);
1948
1949 case Iop_Sqrt64F0x2:
1950 return unary64F0x2(mce, vatom);
1951
1952 case Iop_Sqrt32Fx4:
1953 case Iop_RSqrt32Fx4:
1954 case Iop_Recip32Fx4:
1955 return unary32Fx4(mce, vatom);
1956
1957 case Iop_Sqrt32F0x4:
1958 case Iop_RSqrt32F0x4:
1959 case Iop_Recip32F0x4:
1960 return unary32F0x4(mce, vatom);
1961
1962 case Iop_32UtoV128:
1963 case Iop_64UtoV128:
1964 return assignNew(mce, Ity_V128, unop(op, vatom));
1965
1966 case Iop_F32toF64:
1967 case Iop_I32StoF64:
1968 case Iop_NegF64:
1969 case Iop_SinF64:
1970 case Iop_CosF64:
1971 case Iop_TanF64:
1972 case Iop_SqrtF64:
1973 case Iop_AbsF64:
1974 case Iop_2xm1F64:
1975 return mkPCastTo(mce, Ity_I64, vatom);
1976
1977 case Iop_Clz32:
1978 case Iop_Ctz32:
1979 return mkPCastTo(mce, Ity_I32, vatom);
1980
1981 case Iop_32Sto64:
1982 case Iop_32Uto64:
1983 case Iop_V128to64:
1984 case Iop_V128HIto64:
1985 return assignNew(mce, Ity_I64, unop(op, vatom));
1986
1987 case Iop_64to32:
1988 case Iop_64HIto32:
1989 case Iop_1Uto32:
1990 case Iop_8Uto32:
1991 case Iop_16Uto32:
1992 case Iop_16Sto32:
1993 case Iop_8Sto32:
1994 return assignNew(mce, Ity_I32, unop(op, vatom));
1995
1996 case Iop_8Sto16:
1997 case Iop_8Uto16:
1998 case Iop_32to16:
1999 case Iop_32HIto16:
2000 return assignNew(mce, Ity_I16, unop(op, vatom));
2001
2002 case Iop_1Uto8:
2003 case Iop_16to8:
2004 case Iop_32to8:
2005 return assignNew(mce, Ity_I8, unop(op, vatom));
2006
2007 case Iop_32to1:
2008 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
2009
2010 case Iop_ReinterpF64asI64:
2011 case Iop_ReinterpI64asF64:
2012 case Iop_ReinterpI32asF32:
2013 case Iop_NotV128:
2014 case Iop_Not64:
2015 case Iop_Not32:
2016 case Iop_Not16:
2017 case Iop_Not8:
2018 case Iop_Not1:
2019 return vatom;
2020
2021 default:
2022 ppIROp(op);
2023 VG_(tool_panic)("memcheck:expr2vbits_Unop");
2024 }
2025 }
2026
2027
2028 /* Worker function; do not call directly. */
2029 static
expr2vbits_LDle_WRK(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias)2030 IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2031 {
2032 void* helper;
2033 HChar* hname;
2034 IRDirty* di;
2035 IRTemp datavbits;
2036 IRAtom* addrAct;
2037
2038 tl_assert(isOriginalAtom(mce,addr));
2039
2040 /* First, emit a definedness test for the address. This also sets
2041 the address (shadow) to 'defined' following the test. */
2042 complainIfUndefined( mce, addr );
2043
2044 /* Now cook up a call to the relevant helper function, to read the
2045 data V bits from shadow memory. */
2046 ty = shadowType(ty);
2047 switch (ty) {
2048 case Ity_I64: helper = &MC_(helperc_LOADV8);
2049 hname = "MC_(helperc_LOADV8)";
2050 break;
2051 case Ity_I32: helper = &MC_(helperc_LOADV4);
2052 hname = "MC_(helperc_LOADV4)";
2053 break;
2054 case Ity_I16: helper = &MC_(helperc_LOADV2);
2055 hname = "MC_(helperc_LOADV2)";
2056 break;
2057 case Ity_I8: helper = &MC_(helperc_LOADV1);
2058 hname = "MC_(helperc_LOADV1)";
2059 break;
2060 default: ppIRType(ty);
2061 VG_(tool_panic)("memcheck:do_shadow_LDle");
2062 }
2063
2064 /* Generate the actual address into addrAct. */
2065 if (bias == 0) {
2066 addrAct = addr;
2067 } else {
2068 IROp mkAdd;
2069 IRAtom* eBias;
2070 IRType tyAddr = mce->hWordTy;
2071 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2072 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2073 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2074 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2075 }
2076
2077 /* We need to have a place to park the V bits we're just about to
2078 read. */
2079 datavbits = newIRTemp(mce->bb->tyenv, ty);
2080 di = unsafeIRDirty_1_N( datavbits,
2081 1/*regparms*/, hname, helper,
2082 mkIRExprVec_1( addrAct ));
2083 setHelperAnns( mce, di );
2084 stmt( mce->bb, IRStmt_Dirty(di) );
2085
2086 return mkexpr(datavbits);
2087 }
2088
2089
2090 static
expr2vbits_LDle(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias)2091 IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2092 {
2093 IRAtom *v64hi, *v64lo;
2094 switch (shadowType(ty)) {
2095 case Ity_I8:
2096 case Ity_I16:
2097 case Ity_I32:
2098 case Ity_I64:
2099 return expr2vbits_LDle_WRK(mce, ty, addr, bias);
2100 case Ity_V128:
2101 v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
2102 v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
2103 return assignNew( mce,
2104 Ity_V128,
2105 binop(Iop_64HLtoV128, v64hi, v64lo));
2106 default:
2107 VG_(tool_panic)("expr2vbits_LDle");
2108 }
2109 }
2110
2111
2112 static
expr2vbits_ITE(MCEnv * mce,IRAtom * cond,IRAtom * iftrue,IRAtom * iffalse)2113 IRAtom* expr2vbits_ITE ( MCEnv* mce,
2114 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
2115 {
2116 IRAtom *vbitsC, *vbits0, *vbits1;
2117 IRType ty;
2118 /* Given ITE(cond,iftrue,iffalse), generate
2119 ITE(cond,iftrue#,iffalse#) `UifU` PCast(cond#)
2120 That is, steer the V bits like the originals, but trash the
2121 result if the steering value is undefined. This gives
2122 lazy propagation. */
2123 tl_assert(isOriginalAtom(mce, cond));
2124 tl_assert(isOriginalAtom(mce, iftrue));
2125 tl_assert(isOriginalAtom(mce, iffalse));
2126
2127 vbitsC = expr2vbits(mce, cond);
2128 vbits0 = expr2vbits(mce, iffalse);
2129 vbits1 = expr2vbits(mce, iftrue);
2130 ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2131
2132 return
2133 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_ITE(cond, vbits1, vbits0)),
2134 mkPCastTo(mce, ty, vbitsC) );
2135 }
2136
2137 /* --------- This is the main expression-handling function. --------- */
2138
2139 static
expr2vbits(MCEnv * mce,IRExpr * e)2140 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2141 {
2142 switch (e->tag) {
2143
2144 case Iex_Get:
2145 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2146
2147 case Iex_GetI:
2148 return shadow_GETI( mce, e->Iex.GetI.descr,
2149 e->Iex.GetI.ix, e->Iex.GetI.bias );
2150
2151 case Iex_RdTmp:
2152 return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
2153
2154 case Iex_Const:
2155 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2156
2157 case Iex_Binop:
2158 return expr2vbits_Binop(
2159 mce,
2160 e->Iex.Binop.op,
2161 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2162 );
2163
2164 case Iex_Unop:
2165 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2166
2167 case Iex_Load:
2168 return expr2vbits_LDle( mce, e->Iex.Load.ty,
2169 e->Iex.Load.addr, 0/*addr bias*/ );
2170
2171 case Iex_CCall:
2172 return mkLazyN( mce, e->Iex.CCall.args,
2173 e->Iex.CCall.retty,
2174 e->Iex.CCall.cee );
2175
2176 case Iex_ITE:
2177 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
2178 e->Iex.ITE.iffalse);
2179
2180 default:
2181 VG_(printf)("\n");
2182 ppIRExpr(e);
2183 VG_(printf)("\n");
2184 VG_(tool_panic)("memcheck: expr2vbits");
2185 }
2186 }
2187
2188 /*------------------------------------------------------------*/
2189 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
2190 /*------------------------------------------------------------*/
2191
2192 /* Widen a value to the host word size. */
2193
2194 static
zwidenToHostWord(MCEnv * mce,IRAtom * vatom)2195 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2196 {
2197 IRType ty, tyH;
2198
2199 /* vatom is vbits-value and as such can only have a shadow type. */
2200 tl_assert(isShadowAtom(mce,vatom));
2201
2202 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
2203 tyH = mce->hWordTy;
2204
2205 if (tyH == Ity_I32) {
2206 switch (ty) {
2207 case Ity_I32: return vatom;
2208 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2209 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2210 default: goto unhandled;
2211 }
2212 } else {
2213 goto unhandled;
2214 }
2215 unhandled:
2216 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2217 VG_(tool_panic)("zwidenToHostWord");
2218 }
2219
2220
2221 /* Generate a shadow store. addr is always the original address atom.
2222 You can pass in either originals or V-bits for the data atom, but
2223 obviously not both. */
2224
2225 static
do_shadow_STle(MCEnv * mce,IRAtom * addr,UInt bias,IRAtom * data,IRAtom * vdata)2226 void do_shadow_STle ( MCEnv* mce,
2227 IRAtom* addr, UInt bias,
2228 IRAtom* data, IRAtom* vdata )
2229 {
2230 IROp mkAdd;
2231 IRType ty, tyAddr;
2232 IRDirty *di, *diLo64, *diHi64;
2233 IRAtom *addrAct, *addrLo64, *addrHi64;
2234 IRAtom *vdataLo64, *vdataHi64;
2235 IRAtom *eBias, *eBias0, *eBias8;
2236 void* helper = NULL;
2237 HChar* hname = NULL;
2238
2239 tyAddr = mce->hWordTy;
2240 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2241 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2242
2243 di = diLo64 = diHi64 = NULL;
2244 eBias = eBias0 = eBias8 = NULL;
2245 addrAct = addrLo64 = addrHi64 = NULL;
2246 vdataLo64 = vdataHi64 = NULL;
2247
2248 if (data) {
2249 tl_assert(!vdata);
2250 tl_assert(isOriginalAtom(mce, data));
2251 tl_assert(bias == 0);
2252 vdata = expr2vbits( mce, data );
2253 } else {
2254 tl_assert(vdata);
2255 }
2256
2257 tl_assert(isOriginalAtom(mce,addr));
2258 tl_assert(isShadowAtom(mce,vdata));
2259
2260 ty = typeOfIRExpr(mce->bb->tyenv, vdata);
2261
2262 /* First, emit a definedness test for the address. This also sets
2263 the address (shadow) to 'defined' following the test. */
2264 complainIfUndefined( mce, addr );
2265
2266 /* Now decide which helper function to call to write the data V
2267 bits into shadow memory. */
2268 switch (ty) {
2269 case Ity_V128: /* we'll use the helper twice */
2270 case Ity_I64: helper = &MC_(helperc_STOREV8);
2271 hname = "MC_(helperc_STOREV8)";
2272 break;
2273 case Ity_I32: helper = &MC_(helperc_STOREV4);
2274 hname = "MC_(helperc_STOREV4)";
2275 break;
2276 case Ity_I16: helper = &MC_(helperc_STOREV2);
2277 hname = "MC_(helperc_STOREV2)";
2278 break;
2279 case Ity_I8: helper = &MC_(helperc_STOREV1);
2280 hname = "MC_(helperc_STOREV1)";
2281 break;
2282 default: VG_(tool_panic)("memcheck:do_shadow_STle");
2283 }
2284
2285 if (ty == Ity_V128) {
2286
2287 /* V128-bit case */
2288 /* See comment in next clause re 64-bit regparms */
2289 eBias0 = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2290 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
2291 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
2292 diLo64 = unsafeIRDirty_0_N(
2293 1/*regparms*/, hname, helper,
2294 mkIRExprVec_2( addrLo64, vdataLo64 ));
2295
2296 eBias8 = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2297 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
2298 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
2299 diHi64 = unsafeIRDirty_0_N(
2300 1/*regparms*/, hname, helper,
2301 mkIRExprVec_2( addrHi64, vdataHi64 ));
2302
2303 setHelperAnns( mce, diLo64 );
2304 setHelperAnns( mce, diHi64 );
2305 stmt( mce->bb, IRStmt_Dirty(diLo64) );
2306 stmt( mce->bb, IRStmt_Dirty(diHi64) );
2307
2308 } else {
2309
2310 /* 8/16/32/64-bit cases */
2311 /* Generate the actual address into addrAct. */
2312 if (bias == 0) {
2313 addrAct = addr;
2314 } else {
2315 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2316 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2317 }
2318
2319 if (ty == Ity_I64) {
2320 /* We can't do this with regparm 2 on 32-bit platforms, since
2321 the back ends aren't clever enough to handle 64-bit
2322 regparm args. Therefore be different. */
2323 di = unsafeIRDirty_0_N(
2324 1/*regparms*/, hname, helper,
2325 mkIRExprVec_2( addrAct, vdata ));
2326 } else {
2327 di = unsafeIRDirty_0_N(
2328 2/*regparms*/, hname, helper,
2329 mkIRExprVec_2( addrAct,
2330 zwidenToHostWord( mce, vdata )));
2331 }
2332 setHelperAnns( mce, di );
2333 stmt( mce->bb, IRStmt_Dirty(di) );
2334 }
2335
2336 }
2337
2338
2339 /* Do lazy pessimistic propagation through a dirty helper call, by
2340 looking at the annotations on it. This is the most complex part of
2341 Memcheck. */
2342
szToITy(Int n)2343 static IRType szToITy ( Int n )
2344 {
2345 switch (n) {
2346 case 1: return Ity_I8;
2347 case 2: return Ity_I16;
2348 case 4: return Ity_I32;
2349 case 8: return Ity_I64;
2350 default: VG_(tool_panic)("szToITy(memcheck)");
2351 }
2352 }
2353
2354 static
do_shadow_Dirty(MCEnv * mce,IRDirty * d)2355 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2356 {
2357 Int i, n, offset, toDo, gSz, gOff;
2358 IRAtom *src, *here, *curr;
2359 IRType tyAddr, tySrc, tyDst;
2360 IRTemp dst;
2361
2362 /* First check the guard. */
2363 complainIfUndefined(mce, d->guard);
2364
2365 /* Now round up all inputs and PCast over them. */
2366 curr = definedOfType(Ity_I32);
2367
2368 /* Inputs: unmasked args */
2369 for (i = 0; d->args[i]; i++) {
2370 if (d->cee->mcx_mask & (1<<i)) {
2371 /* ignore this arg */
2372 } else {
2373 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2374 curr = mkUifU32(mce, here, curr);
2375 }
2376 }
2377
2378 /* Inputs: guest state that we read. */
2379 for (i = 0; i < d->nFxState; i++) {
2380 tl_assert(d->fxState[i].fx != Ifx_None);
2381 if (d->fxState[i].fx == Ifx_Write)
2382 continue;
2383
2384 /* Ignore any sections marked as 'always defined'. */
2385 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
2386 if (0)
2387 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2388 d->fxState[i].offset, d->fxState[i].size );
2389 continue;
2390 }
2391
2392 /* This state element is read or modified. So we need to
2393 consider it. If larger than 8 bytes, deal with it in 8-byte
2394 chunks. */
2395 gSz = d->fxState[i].size;
2396 gOff = d->fxState[i].offset;
2397 tl_assert(gSz > 0);
2398 while (True) {
2399 if (gSz == 0) break;
2400 n = gSz <= 8 ? gSz : 8;
2401 /* update 'curr' with UifU of the state slice
2402 gOff .. gOff+n-1 */
2403 tySrc = szToITy( n );
2404 src = assignNew( mce, tySrc,
2405 shadow_GET(mce, gOff, tySrc ) );
2406 here = mkPCastTo( mce, Ity_I32, src );
2407 curr = mkUifU32(mce, here, curr);
2408 gSz -= n;
2409 gOff += n;
2410 }
2411
2412 }
2413
2414 /* Inputs: memory. First set up some info needed regardless of
2415 whether we're doing reads or writes. */
2416 tyAddr = Ity_INVALID;
2417
2418 if (d->mFx != Ifx_None) {
2419 /* Because we may do multiple shadow loads/stores from the same
2420 base address, it's best to do a single test of its
2421 definedness right now. Post-instrumentation optimisation
2422 should remove all but this test. */
2423 tl_assert(d->mAddr);
2424 complainIfUndefined(mce, d->mAddr);
2425
2426 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2427 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2428 tl_assert(tyAddr == mce->hWordTy); /* not really right */
2429 }
2430
2431 /* Deal with memory inputs (reads or modifies) */
2432 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2433 offset = 0;
2434 toDo = d->mSize;
2435 /* chew off 32-bit chunks */
2436 while (toDo >= 4) {
2437 here = mkPCastTo(
2438 mce, Ity_I32,
2439 expr2vbits_LDle ( mce, Ity_I32,
2440 d->mAddr, d->mSize - toDo )
2441 );
2442 curr = mkUifU32(mce, here, curr);
2443 toDo -= 4;
2444 }
2445 /* chew off 16-bit chunks */
2446 while (toDo >= 2) {
2447 here = mkPCastTo(
2448 mce, Ity_I32,
2449 expr2vbits_LDle ( mce, Ity_I16,
2450 d->mAddr, d->mSize - toDo )
2451 );
2452 curr = mkUifU32(mce, here, curr);
2453 toDo -= 2;
2454 }
2455 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2456 }
2457
2458 /* Whew! So curr is a 32-bit V-value summarising pessimistically
2459 all the inputs to the helper. Now we need to re-distribute the
2460 results to all destinations. */
2461
2462 /* Outputs: the destination temporary, if there is one. */
2463 if (d->tmp != IRTemp_INVALID) {
2464 dst = findShadowTmp(mce, d->tmp);
2465 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2466 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2467 }
2468
2469 /* Outputs: guest state that we write or modify. */
2470 for (i = 0; i < d->nFxState; i++) {
2471 tl_assert(d->fxState[i].fx != Ifx_None);
2472 if (d->fxState[i].fx == Ifx_Read)
2473 continue;
2474 /* Ignore any sections marked as 'always defined'. */
2475 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2476 continue;
2477 /* This state element is written or modified. So we need to
2478 consider it. If larger than 8 bytes, deal with it in 8-byte
2479 chunks. */
2480 gSz = d->fxState[i].size;
2481 gOff = d->fxState[i].offset;
2482 tl_assert(gSz > 0);
2483 while (True) {
2484 if (gSz == 0) break;
2485 n = gSz <= 8 ? gSz : 8;
2486 /* Write suitably-casted 'curr' to the state slice
2487 gOff .. gOff+n-1 */
2488 tyDst = szToITy( n );
2489 do_shadow_PUT( mce, gOff,
2490 NULL, /* original atom */
2491 mkPCastTo( mce, tyDst, curr ) );
2492 gSz -= n;
2493 gOff += n;
2494 }
2495 }
2496
2497 /* Outputs: memory that we write or modify. */
2498 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2499 offset = 0;
2500 toDo = d->mSize;
2501 /* chew off 32-bit chunks */
2502 while (toDo >= 4) {
2503 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2504 NULL, /* original data */
2505 mkPCastTo( mce, Ity_I32, curr ) );
2506 toDo -= 4;
2507 }
2508 /* chew off 16-bit chunks */
2509 while (toDo >= 2) {
2510 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2511 NULL, /* original data */
2512 mkPCastTo( mce, Ity_I16, curr ) );
2513 toDo -= 2;
2514 }
2515 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2516 }
2517
2518 }
2519
2520
2521 /*------------------------------------------------------------*/
2522 /*--- Memcheck main ---*/
2523 /*------------------------------------------------------------*/
2524
isBogusAtom(IRAtom * at)2525 static Bool isBogusAtom ( IRAtom* at )
2526 {
2527 ULong n = 0;
2528 IRConst* con;
2529 tl_assert(isIRAtom(at));
2530 if (at->tag == Iex_RdTmp)
2531 return False;
2532 tl_assert(at->tag == Iex_Const);
2533 con = at->Iex.Const.con;
2534 switch (con->tag) {
2535 case Ico_U8: n = (ULong)con->Ico.U8; break;
2536 case Ico_U16: n = (ULong)con->Ico.U16; break;
2537 case Ico_U32: n = (ULong)con->Ico.U32; break;
2538 case Ico_U64: n = (ULong)con->Ico.U64; break;
2539 default: ppIRExpr(at); tl_assert(0);
2540 }
2541 /* VG_(printf)("%llx\n", n); */
2542 return (n == 0xFEFEFEFF
2543 || n == 0x80808080
2544 || n == 0x1010101
2545 || n == 1010100);
2546 }
2547
2548 __attribute__((unused))
checkForBogusLiterals(IRStmt * st)2549 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2550 {
2551 Int i;
2552 IRExpr* e;
2553 switch (st->tag) {
2554 case Ist_WrTmp:
2555 e = st->Ist.WrTmp.data;
2556 switch (e->tag) {
2557 case Iex_Get:
2558 case Iex_RdTmp:
2559 return False;
2560 case Iex_Unop:
2561 return isBogusAtom(e->Iex.Unop.arg);
2562 case Iex_Binop:
2563 return isBogusAtom(e->Iex.Binop.arg1)
2564 || isBogusAtom(e->Iex.Binop.arg2);
2565 case Iex_ITE:
2566 return isBogusAtom(e->Iex.ITE.cond)
2567 || isBogusAtom(e->Iex.ITE.iftrue)
2568 || isBogusAtom(e->Iex.ITE.iffalse);
2569 case Iex_Load:
2570 return isBogusAtom(e->Iex.Load.addr);
2571 case Iex_CCall:
2572 for (i = 0; e->Iex.CCall.args[i]; i++)
2573 if (isBogusAtom(e->Iex.CCall.args[i]))
2574 return True;
2575 return False;
2576 default:
2577 goto unhandled;
2578 }
2579 case Ist_Put:
2580 return isBogusAtom(st->Ist.Put.data);
2581 case Ist_Store:
2582 return isBogusAtom(st->Ist.Store.addr)
2583 || isBogusAtom(st->Ist.Store.data);
2584 case Ist_Exit:
2585 return isBogusAtom(st->Ist.Exit.guard);
2586 default:
2587 unhandled:
2588 ppIRStmt(st);
2589 VG_(tool_panic)("hasBogusLiterals");
2590 }
2591 }
2592
mc_instrument(void * closureV,IRSB * bb_in,VexGuestLayout * layout,VexGuestExtents * vge,IRType gWordTy,IRType hWordTy)2593 IRSB* mc_instrument ( void* closureV,
2594 IRSB* bb_in, VexGuestLayout* layout,
2595 VexGuestExtents* vge,
2596 IRType gWordTy, IRType hWordTy )
2597 {
2598 Bool verboze = False; //True;
2599
2600 /* Bool hasBogusLiterals = False; */
2601
2602 Int i, j, first_stmt;
2603 IRStmt* st;
2604 MCEnv mce;
2605
2606 /* Set up BB */
2607 IRSB* bb = emptyIRSB();
2608 bb->tyenv = deepCopyIRTypeEnv(bb_in->tyenv);
2609 bb->next = deepCopyIRExpr(bb_in->next);
2610 bb->jumpkind = bb_in->jumpkind;
2611
2612 /* Set up the running environment. Only .bb is modified as we go
2613 along. */
2614 mce.bb = bb;
2615 mce.layout = layout;
2616 mce.n_originalTmps = bb->tyenv->types_used;
2617 mce.hWordTy = hWordTy;
2618 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2619 for (i = 0; i < mce.n_originalTmps; i++)
2620 mce.tmpMap[i] = IRTemp_INVALID;
2621
2622 /* Iterate over the stmts. */
2623
2624 for (i = 0; i < bb_in->stmts_used; i++) {
2625 st = bb_in->stmts[i];
2626 if (!st) continue;
2627
2628 tl_assert(isFlatIRStmt(st));
2629
2630 /*
2631 if (!hasBogusLiterals) {
2632 hasBogusLiterals = checkForBogusLiterals(st);
2633 if (hasBogusLiterals) {
2634 VG_(printf)("bogus: ");
2635 ppIRStmt(st);
2636 VG_(printf)("\n");
2637 }
2638 }
2639 */
2640 first_stmt = bb->stmts_used;
2641
2642 if (verboze) {
2643 ppIRStmt(st);
2644 VG_(printf)("\n\n");
2645 }
2646
2647 switch (st->tag) {
2648
2649 case Ist_WrTmp:
2650 assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
2651 expr2vbits( &mce, st->Ist.WrTmp.data) );
2652 break;
2653
2654 case Ist_Put:
2655 do_shadow_PUT( &mce,
2656 st->Ist.Put.offset,
2657 st->Ist.Put.data,
2658 NULL /* shadow atom */ );
2659 break;
2660
2661 case Ist_PutI:
2662 do_shadow_PUTI( &mce,
2663 st->Ist.PutI.details->descr,
2664 st->Ist.PutI.details->ix,
2665 st->Ist.PutI.details->bias,
2666 st->Ist.PutI.details->data );
2667 break;
2668
2669 case Ist_Store:
2670 do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
2671 st->Ist.Store.data,
2672 NULL /* shadow data */ );
2673 break;
2674
2675 case Ist_Exit:
2676 /* if (!hasBogusLiterals) */
2677 complainIfUndefined( &mce, st->Ist.Exit.guard );
2678 break;
2679
2680 case Ist_Dirty:
2681 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
2682 break;
2683
2684 case Ist_IMark:
2685 case Ist_NoOp:
2686 break;
2687
2688 default:
2689 VG_(printf)("\n");
2690 ppIRStmt(st);
2691 VG_(printf)("\n");
2692 VG_(tool_panic)("memcheck: unhandled IRStmt");
2693
2694 } /* switch (st->tag) */
2695
2696 if (verboze) {
2697 for (j = first_stmt; j < bb->stmts_used; j++) {
2698 VG_(printf)(" ");
2699 ppIRStmt(bb->stmts[j]);
2700 VG_(printf)("\n");
2701 }
2702 VG_(printf)("\n");
2703 }
2704
2705 addStmtToIRSB(bb, st);
2706
2707 }
2708
2709 /* Now we need to complain if the jump target is undefined. */
2710 first_stmt = bb->stmts_used;
2711
2712 if (verboze) {
2713 VG_(printf)("bb->next = ");
2714 ppIRExpr(bb->next);
2715 VG_(printf)("\n\n");
2716 }
2717
2718 complainIfUndefined( &mce, bb->next );
2719
2720 if (verboze) {
2721 for (j = first_stmt; j < bb->stmts_used; j++) {
2722 VG_(printf)(" ");
2723 ppIRStmt(bb->stmts[j]);
2724 VG_(printf)("\n");
2725 }
2726 VG_(printf)("\n");
2727 }
2728
2729 return bb;
2730 }
2731 #endif /* UNUSED */
2732
2733 /*--------------------------------------------------------------------*/
2734 /*--- end test_main.c ---*/
2735 /*--------------------------------------------------------------------*/
2736