• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                       test_main.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2013 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <assert.h>
39 #include <string.h>
40 
41 #include "libvex_basictypes.h"
42 #include "libvex.h"
43 
44 #include "test_main.h"
45 
46 
47 /*---------------------------------------------------------------*/
48 /*--- Test                                                    ---*/
49 /*---------------------------------------------------------------*/
50 
51 
52 __attribute__ ((noreturn))
53 static
failure_exit(void)54 void failure_exit ( void )
55 {
56    fprintf(stdout, "VEX did failure_exit.  Bye.\n");
57    exit(1);
58 }
59 
60 static
log_bytes(HChar * bytes,Int nbytes)61 void log_bytes ( HChar* bytes, Int nbytes )
62 {
63    fwrite ( bytes, 1, nbytes, stdout );
64 }
65 
66 #define N_LINEBUF 10000
67 static HChar linebuf[N_LINEBUF];
68 
69 #define N_ORIGBUF 10000
70 #define N_TRANSBUF 5000
71 
72 static UChar origbuf[N_ORIGBUF];
73 static UChar transbuf[N_TRANSBUF];
74 
75 static Bool verbose = True;
76 
77 /* Forwards */
78 #if 1 /* UNUSED */
79 //static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
80 static
81 IRSB* mc_instrument ( void* closureV,
82                       IRSB* bb_in, VexGuestLayout* layout,
83                       VexGuestExtents* vge,
84                       IRType gWordTy, IRType hWordTy );
85 #endif
86 
chase_into_not_ok(void * opaque,Addr64 dst)87 static Bool chase_into_not_ok ( void* opaque, Addr64 dst ) {
88    return False;
89 }
needs_self_check(void * opaque,VexGuestExtents * vge)90 static UInt needs_self_check ( void* opaque, VexGuestExtents* vge ) {
91    return 0;
92 }
93 
main(int argc,char ** argv)94 int main ( int argc, char** argv )
95 {
96    FILE* f;
97    Int i;
98    UInt u, sum;
99    Addr32 orig_addr;
100    Int bb_number, n_bbs_done = 0;
101    Int orig_nbytes, trans_used;
102    VexTranslateResult tres;
103    VexControl vcon;
104    VexGuestExtents vge;
105    VexArchInfo vai_x86, vai_amd64, vai_ppc32, vai_arm;
106    VexAbiInfo vbi;
107    VexTranslateArgs vta;
108 
109    if (argc != 2) {
110       fprintf(stderr, "usage: vex file.orig\n");
111       exit(1);
112    }
113    f = fopen(argv[1], "r");
114    if (!f) {
115       fprintf(stderr, "can't open `%s'\n", argv[1]);
116       exit(1);
117    }
118 
119    /* Run with default params.  However, we can't allow bb chasing
120       since that causes the front end to get segfaults when it tries
121       to read code outside the initial BB we hand it.  So when calling
122       LibVEX_Translate, send in a chase-into predicate that always
123       returns False. */
124    LibVEX_default_VexControl ( &vcon );
125    vcon.iropt_level = 2;
126    vcon.guest_max_insns = 60;
127 
128    LibVEX_Init ( &failure_exit, &log_bytes,
129                  1,  /* debug_paranoia */
130                  TEST_VSUPPORT, /* valgrind support */
131                  &vcon );
132 
133 
134    while (!feof(f)) {
135 
136       __attribute__((unused))
137       char* unused1 = fgets(linebuf, N_LINEBUF,f);
138       if (linebuf[0] == 0) continue;
139       if (linebuf[0] != '.') continue;
140 
141       if (n_bbs_done == TEST_N_BBS) break;
142       n_bbs_done++;
143 
144       /* first line is:   . bb-number bb-addr n-bytes */
145       assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
146                                  & bb_number,
147                                  & orig_addr, & orig_nbytes ));
148       assert(orig_nbytes >= 1);
149       assert(!feof(f));
150       __attribute__((unused))
151       char* unused2 = fgets(linebuf, N_LINEBUF,f);
152       assert(linebuf[0] == '.');
153 
154       /* second line is:   . byte byte byte etc */
155       if (verbose)
156          printf("============ Basic Block %d, Done %d, "
157                 "Start %x, nbytes %2d ============",
158                 bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
159 
160       /* thumb ITstate analysis needs to examine the 18 bytes
161          preceding the first instruction.  So let's leave the first 18
162          zeroed out. */
163       memset(origbuf, 0, sizeof(origbuf));
164 
165       assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
166       for (i = 0; i < orig_nbytes; i++) {
167          assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
168          origbuf[18+ i] = (UChar)u;
169       }
170 
171       /* FIXME: put sensible values into the .hwcaps fields */
172       LibVEX_default_VexArchInfo(&vai_x86);
173       vai_x86.hwcaps = VEX_HWCAPS_X86_SSE1
174                        | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
175 
176       LibVEX_default_VexArchInfo(&vai_amd64);
177       vai_amd64.hwcaps = 0;
178 
179       LibVEX_default_VexArchInfo(&vai_ppc32);
180       vai_ppc32.hwcaps = 0;
181       vai_ppc32.ppc_icache_line_szB = 128;
182 
183       LibVEX_default_VexArchInfo(&vai_arm);
184       vai_arm.hwcaps = VEX_HWCAPS_ARM_VFP3 | VEX_HWCAPS_ARM_NEON | 7;
185 
186       LibVEX_default_VexAbiInfo(&vbi);
187       vbi.guest_stack_redzone_size = 128;
188 
189       /* ----- Set up args for LibVEX_Translate ----- */
190 
191       vta.abiinfo_both    = vbi;
192       vta.guest_bytes     = &origbuf[18];
193       vta.guest_bytes_addr = (Addr64)orig_addr;
194       vta.callback_opaque = NULL;
195       vta.chase_into_ok   = chase_into_not_ok;
196       vta.guest_extents   = &vge;
197       vta.host_bytes      = transbuf;
198       vta.host_bytes_size = N_TRANSBUF;
199       vta.host_bytes_used = &trans_used;
200 
201 #if 0 /* ppc32 -> ppc32 */
202       vta.arch_guest     = VexArchPPC32;
203       vta.archinfo_guest = vai_ppc32;
204       vta.arch_host      = VexArchPPC32;
205       vta.archinfo_host  = vai_ppc32;
206 #endif
207 #if 0 /* amd64 -> amd64 */
208       vta.arch_guest     = VexArchAMD64;
209       vta.archinfo_guest = vai_amd64;
210       vta.arch_host      = VexArchAMD64;
211       vta.archinfo_host  = vai_amd64;
212 #endif
213 #if 0 /* x86 -> x86 */
214       vta.arch_guest     = VexArchX86;
215       vta.archinfo_guest = vai_x86;
216       vta.arch_host      = VexArchX86;
217       vta.archinfo_host  = vai_x86;
218 #endif
219 #if 1 /* arm -> arm */
220       vta.arch_guest     = VexArchARM;
221       vta.archinfo_guest = vai_arm;
222       vta.arch_host      = VexArchARM;
223       vta.archinfo_host  = vai_arm;
224       /* ARM/Thumb only hacks, that are needed to keep the ITstate
225          analyser in the front end happy.  */
226       vta.guest_bytes     = &origbuf[18 +1];
227       vta.guest_bytes_addr = (Addr64)(&origbuf[18 +1]);
228 #endif
229 
230 #if 1 /* no instrumentation */
231       vta.instrument1     = NULL;
232       vta.instrument2     = NULL;
233 #endif
234 #if 0 /* addrcheck */
235       vta.instrument1     = ac_instrument;
236       vta.instrument2     = NULL;
237 #endif
238 #if 0 /* memcheck */
239       vta.instrument1     = mc_instrument;
240       vta.instrument2     = NULL;
241 #endif
242       vta.needs_self_check  = needs_self_check;
243       vta.preamble_function = NULL;
244       vta.traceflags      = TEST_FLAGS;
245       vta.addProfInc      = False;
246       vta.sigill_diag     = True;
247 
248       vta.disp_cp_chain_me_to_slowEP = (void*)0x12345678;
249       vta.disp_cp_chain_me_to_fastEP = (void*)0x12345679;
250       vta.disp_cp_xindir             = (void*)0x1234567A;
251       vta.disp_cp_xassisted          = (void*)0x1234567B;
252 
253       vta.finaltidy = NULL;
254 
255       for (i = 0; i < TEST_N_ITERS; i++)
256          tres = LibVEX_Translate ( &vta );
257 
258       if (tres.status != VexTransOK)
259          printf("\ntres = %d\n", (Int)tres.status);
260       assert(tres.status == VexTransOK);
261       assert(tres.n_sc_extents == 0);
262       assert(vge.n_used == 1);
263       assert((UInt)(vge.len[0]) == orig_nbytes);
264 
265       sum = 0;
266       for (i = 0; i < trans_used; i++)
267          sum += (UInt)transbuf[i];
268       printf ( " %6.2f ... %u\n",
269                (double)trans_used / (double)vge.len[0], sum );
270    }
271 
272    fclose(f);
273    printf("\n");
274    LibVEX_ShowAllocStats();
275 
276    return 0;
277 }
278 
279 //////////////////////////////////////////////////////////////////////
280 //////////////////////////////////////////////////////////////////////
281 //////////////////////////////////////////////////////////////////////
282 //////////////////////////////////////////////////////////////////////
283 //////////////////////////////////////////////////////////////////////
284 //////////////////////////////////////////////////////////////////////
285 //////////////////////////////////////////////////////////////////////
286 //////////////////////////////////////////////////////////////////////
287 
288 #if 0 /* UNUSED */
289 
290 static
291 __attribute((noreturn))
292 void panic ( HChar* s )
293 {
294   printf("\npanic: %s\n", s);
295   failure_exit();
296 }
297 
298 static
299 IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
300 {
301 /* Use this rather than eg. -1 because it's a UInt. */
302 #define INVALID_DATA_SIZE   999999
303 
304    Int         i;
305    Int         sz;
306    IRCallee*   helper;
307    IRStmt*    st;
308    IRExpr* data;
309    IRExpr* addr;
310    Bool needSz;
311 
312    /* Set up BB */
313    IRSB* bb     = emptyIRSB();
314    bb->tyenv    = dopyIRTypeEnv(bb_in->tyenv);
315    bb->next     = dopyIRExpr(bb_in->next);
316    bb->jumpkind = bb_in->jumpkind;
317 
318    /* No loads to consider in ->next. */
319    assert(isIRAtom(bb_in->next));
320 
321    for (i = 0; i <  bb_in->stmts_used; i++) {
322       st = bb_in->stmts[i];
323       if (!st) continue;
324 
325       switch (st->tag) {
326 
327          case Ist_Tmp:
328             data = st->Ist.Tmp.data;
329             if (data->tag == Iex_LDle) {
330                addr = data->Iex.LDle.addr;
331                sz = sizeofIRType(data->Iex.LDle.ty);
332                needSz = False;
333                switch (sz) {
334                   case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
335                                                  (void*)0x12345601); break;
336                   case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
337                                                  (void*)0x12345602); break;
338                   case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
339                                                  (void*)0x12345603); break;
340                   default: helper = mkIRCallee(0, "ac_helperc_LOADN",
341                                                   (void*)0x12345604);
342                                                   needSz = True; break;
343                }
344                if (needSz) {
345                   addStmtToIRSB(
346                      bb,
347                      IRStmt_Dirty(
348                         unsafeIRDirty_0_N( helper->regparms,
349 					   helper->name, helper->addr,
350                                            mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
351                   ));
352                } else {
353                   addStmtToIRSB(
354                      bb,
355                      IRStmt_Dirty(
356                         unsafeIRDirty_0_N( helper->regparms,
357 					   helper->name, helper->addr,
358                                            mkIRExprVec_1(addr) )
359                   ));
360                }
361             }
362             break;
363 
364          case Ist_STle:
365             data = st->Ist.STle.data;
366             addr = st->Ist.STle.addr;
367             assert(isIRAtom(data));
368             assert(isIRAtom(addr));
369             sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
370             needSz = False;
371             switch (sz) {
372                case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
373                                               (void*)0x12345605); break;
374                case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
375                                               (void*)0x12345606); break;
376                case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
377                                               (void*)0x12345607); break;
378                default: helper = mkIRCallee(0, "ac_helperc_STOREN",
379                                                (void*)0x12345608);
380                                                needSz = True; break;
381             }
382             if (needSz) {
383                addStmtToIRSB(
384                   bb,
385                   IRStmt_Dirty(
386                      unsafeIRDirty_0_N( helper->regparms,
387     				        helper->name, helper->addr,
388                                         mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
389                ));
390             } else {
391                addStmtToIRSB(
392                   bb,
393                   IRStmt_Dirty(
394                      unsafeIRDirty_0_N( helper->regparms,
395                                         helper->name, helper->addr,
396                                         mkIRExprVec_1(addr) )
397                ));
398             }
399             break;
400 
401          case Ist_Put:
402             assert(isIRAtom(st->Ist.Put.data));
403             break;
404 
405          case Ist_PutI:
406             assert(isIRAtom(st->Ist.PutI.ix));
407             assert(isIRAtom(st->Ist.PutI.data));
408             break;
409 
410          case Ist_Exit:
411             assert(isIRAtom(st->Ist.Exit.guard));
412             break;
413 
414          case Ist_Dirty:
415             /* If the call doesn't interact with memory, we ain't
416                interested. */
417             if (st->Ist.Dirty.details->mFx == Ifx_None)
418                break;
419             goto unhandled;
420 
421          default:
422          unhandled:
423             printf("\n");
424             ppIRStmt(st);
425             printf("\n");
426             panic("addrcheck: unhandled IRStmt");
427       }
428 
429       addStmtToIRSB( bb, dopyIRStmt(st));
430    }
431 
432    return bb;
433 }
434 #endif /* UNUSED */
435 
436 //////////////////////////////////////////////////////////////////////
437 //////////////////////////////////////////////////////////////////////
438 //////////////////////////////////////////////////////////////////////
439 //////////////////////////////////////////////////////////////////////
440 //////////////////////////////////////////////////////////////////////
441 //////////////////////////////////////////////////////////////////////
442 //////////////////////////////////////////////////////////////////////
443 //////////////////////////////////////////////////////////////////////
444 
445 #if 1 /* UNUSED */
446 
447 static
448 __attribute((noreturn))
panic(HChar * s)449 void panic ( HChar* s )
450 {
451   printf("\npanic: %s\n", s);
452   failure_exit();
453 }
454 
455 #define tl_assert(xxx) assert(xxx)
456 #define VG_(xxxx) xxxx
457 #define tool_panic(zzz) panic(zzz)
458 #define MC_(zzzz) MC_##zzzz
459 #define TL_(zzzz) SK_##zzzz
460 
461 
462 static void MC_helperc_complain_undef ( void );
463 static void MC_helperc_LOADV8 ( void );
464 static void MC_helperc_LOADV4 ( void );
465 static void MC_helperc_LOADV2 ( void );
466 static void MC_helperc_LOADV1 ( void );
467 static void MC_helperc_STOREV8( void );
468 static void MC_helperc_STOREV4( void );
469 static void MC_helperc_STOREV2( void );
470 static void MC_helperc_STOREV1( void );
471 static void MC_helperc_value_check0_fail( void );
472 static void MC_helperc_value_check1_fail( void );
473 static void MC_helperc_value_check4_fail( void );
474 
MC_helperc_complain_undef(void)475 static void MC_helperc_complain_undef ( void ) { }
MC_helperc_LOADV8(void)476 static void MC_helperc_LOADV8 ( void ) { }
MC_helperc_LOADV4(void)477 static void MC_helperc_LOADV4 ( void ) { }
MC_helperc_LOADV2(void)478 static void MC_helperc_LOADV2 ( void ) { }
MC_helperc_LOADV1(void)479 static void MC_helperc_LOADV1 ( void ) { }
MC_helperc_STOREV8(void)480 static void MC_helperc_STOREV8( void ) { }
MC_helperc_STOREV4(void)481 static void MC_helperc_STOREV4( void ) { }
MC_helperc_STOREV2(void)482 static void MC_helperc_STOREV2( void ) { }
MC_helperc_STOREV1(void)483 static void MC_helperc_STOREV1( void ) { }
MC_helperc_value_check0_fail(void)484 static void MC_helperc_value_check0_fail( void ) { }
MC_helperc_value_check1_fail(void)485 static void MC_helperc_value_check1_fail( void ) { }
MC_helperc_value_check4_fail(void)486 static void MC_helperc_value_check4_fail( void ) { }
487 
488 
489 /*--------------------------------------------------------------------*/
490 /*--- Instrument IR to perform memory checking operations.         ---*/
491 /*---                                               mc_translate.c ---*/
492 /*--------------------------------------------------------------------*/
493 
494 /*
495    This file is part of MemCheck, a heavyweight Valgrind tool for
496    detecting memory errors.
497 
498    Copyright (C) 2000-2013 Julian Seward
499       jseward@acm.org
500 
501    This program is free software; you can redistribute it and/or
502    modify it under the terms of the GNU General Public License as
503    published by the Free Software Foundation; either version 2 of the
504    License, or (at your option) any later version.
505 
506    This program is distributed in the hope that it will be useful, but
507    WITHOUT ANY WARRANTY; without even the implied warranty of
508    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
509    General Public License for more details.
510 
511    You should have received a copy of the GNU General Public License
512    along with this program; if not, write to the Free Software
513    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
514    02111-1307, USA.
515 
516    The GNU General Public License is contained in the file COPYING.
517 */
518 
519 //#include "mc_include.h"
520 
521 
522 /*------------------------------------------------------------*/
523 /*--- Forward decls                                        ---*/
524 /*------------------------------------------------------------*/
525 
526 struct _MCEnv;
527 
528 static IRType  shadowType ( IRType ty );
529 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
530 
531 
532 /*------------------------------------------------------------*/
533 /*--- Memcheck running state, and tmp management.          ---*/
534 /*------------------------------------------------------------*/
535 
536 /* Carries around state during memcheck instrumentation. */
537 typedef
538    struct _MCEnv {
539       /* MODIFIED: the bb being constructed.  IRStmts are added. */
540       IRSB* bb;
541 
542       /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
543          original temps to their current their current shadow temp.
544          Initially all entries are IRTemp_INVALID.  Entries are added
545          lazily since many original temps are not used due to
546          optimisation prior to instrumentation.  Note that floating
547          point original tmps are shadowed by integer tmps of the same
548          size, and Bit-typed original tmps are shadowed by the type
549          Ity_I8.  See comment below. */
550       IRTemp* tmpMap;
551       Int     n_originalTmps; /* for range checking */
552 
553       /* READONLY: the guest layout.  This indicates which parts of
554          the guest state should be regarded as 'always defined'. */
555       VexGuestLayout* layout;
556       /* READONLY: the host word type.  Needed for constructing
557          arguments of type 'HWord' to be passed to helper functions.
558          Ity_I32 or Ity_I64 only. */
559       IRType hWordTy;
560    }
561    MCEnv;
562 
563 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
564    demand), as they are encountered.  This is for two reasons.
565 
566    (1) (less important reason): Many original tmps are unused due to
567    initial IR optimisation, and we do not want to spaces in tables
568    tracking them.
569 
570    Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
571    table indexed [0 .. n_types-1], which gives the current shadow for
572    each original tmp, or INVALID_IRTEMP if none is so far assigned.
573    It is necessary to support making multiple assignments to a shadow
574    -- specifically, after testing a shadow for definedness, it needs
575    to be made defined.  But IR's SSA property disallows this.
576 
577    (2) (more important reason): Therefore, when a shadow needs to get
578    a new value, a new temporary is created, the value is assigned to
579    that, and the tmpMap is updated to reflect the new binding.
580 
581    A corollary is that if the tmpMap maps a given tmp to
582    INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
583    there's a read-before-write error in the original tmps.  The IR
584    sanity checker should catch all such anomalies, however.
585 */
586 
587 /* Find the tmp currently shadowing the given original tmp.  If none
588    so far exists, allocate one.  */
findShadowTmp(MCEnv * mce,IRTemp orig)589 static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
590 {
591    tl_assert(orig < mce->n_originalTmps);
592    if (mce->tmpMap[orig] == IRTemp_INVALID) {
593       mce->tmpMap[orig]
594          = newIRTemp(mce->bb->tyenv,
595                      shadowType(mce->bb->tyenv->types[orig]));
596    }
597    return mce->tmpMap[orig];
598 }
599 
600 /* Allocate a new shadow for the given original tmp.  This means any
601    previous shadow is abandoned.  This is needed because it is
602    necessary to give a new value to a shadow once it has been tested
603    for undefinedness, but unfortunately IR's SSA property disallows
604    this.  Instead we must abandon the old shadow, allocate a new one
605    and use that instead. */
newShadowTmp(MCEnv * mce,IRTemp orig)606 static void newShadowTmp ( MCEnv* mce, IRTemp orig )
607 {
608    tl_assert(orig < mce->n_originalTmps);
609    mce->tmpMap[orig]
610       = newIRTemp(mce->bb->tyenv,
611                   shadowType(mce->bb->tyenv->types[orig]));
612 }
613 
614 
615 /*------------------------------------------------------------*/
616 /*--- IRAtoms -- a subset of IRExprs                       ---*/
617 /*------------------------------------------------------------*/
618 
619 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
620    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
621    input, most of this code deals in atoms.  Usefully, a value atom
622    always has a V-value which is also an atom: constants are shadowed
623    by constants, and temps are shadowed by the corresponding shadow
624    temporary. */
625 
626 typedef  IRExpr  IRAtom;
627 
628 /* (used for sanity checks only): is this an atom which looks
629    like it's from original code? */
isOriginalAtom(MCEnv * mce,IRAtom * a1)630 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
631 {
632    if (a1->tag == Iex_Const)
633       return True;
634    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
635       return True;
636    return False;
637 }
638 
639 /* (used for sanity checks only): is this an atom which looks
640    like it's from shadow code? */
isShadowAtom(MCEnv * mce,IRAtom * a1)641 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
642 {
643    if (a1->tag == Iex_Const)
644       return True;
645    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
646       return True;
647    return False;
648 }
649 
650 /* (used for sanity checks only): check that both args are atoms and
651    are identically-kinded. */
sameKindedAtoms(IRAtom * a1,IRAtom * a2)652 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
653 {
654    if (a1->tag == Iex_RdTmp && a1->tag == Iex_RdTmp)
655       return True;
656    if (a1->tag == Iex_Const && a1->tag == Iex_Const)
657       return True;
658    return False;
659 }
660 
661 
662 /*------------------------------------------------------------*/
663 /*--- Type management                                      ---*/
664 /*------------------------------------------------------------*/
665 
666 /* Shadow state is always accessed using integer types.  This returns
667    an integer type with the same size (as per sizeofIRType) as the
668    given type.  The only valid shadow types are Bit, I8, I16, I32,
669    I64, V128. */
670 
shadowType(IRType ty)671 static IRType shadowType ( IRType ty )
672 {
673    switch (ty) {
674       case Ity_I1:
675       case Ity_I8:
676       case Ity_I16:
677       case Ity_I32:
678       case Ity_I64:  return ty;
679       case Ity_F32:  return Ity_I32;
680       case Ity_F64:  return Ity_I64;
681       case Ity_V128: return Ity_V128;
682       default: ppIRType(ty);
683                VG_(tool_panic)("memcheck:shadowType");
684    }
685 }
686 
687 /* Produce a 'defined' value of the given shadow type.  Should only be
688    supplied shadow types (Bit/I8/I16/I32/UI64). */
definedOfType(IRType ty)689 static IRExpr* definedOfType ( IRType ty ) {
690    switch (ty) {
691       case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
692       case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
693       case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
694       case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
695       case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
696       case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
697       default:      VG_(tool_panic)("memcheck:definedOfType");
698    }
699 }
700 
701 
702 /*------------------------------------------------------------*/
703 /*--- Constructing IR fragments                            ---*/
704 /*------------------------------------------------------------*/
705 
706 /* assign value to tmp */
707 #define assign(_bb,_tmp,_expr)   \
708    addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
709 
710 /* add stmt to a bb */
711 #define stmt(_bb,_stmt)    \
712    addStmtToIRSB((_bb), (_stmt))
713 
714 /* build various kinds of expressions */
715 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
716 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
717 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
718 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
719 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
720 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
721 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
722 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
723 
724 /* bind the given expression to a new temporary, and return the
725    temporary.  This effectively converts an arbitrary expression into
726    an atom. */
assignNew(MCEnv * mce,IRType ty,IRExpr * e)727 static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
728    IRTemp t = newIRTemp(mce->bb->tyenv, ty);
729    assign(mce->bb, t, e);
730    return mkexpr(t);
731 }
732 
733 
734 /*------------------------------------------------------------*/
735 /*--- Constructing definedness primitive ops               ---*/
736 /*------------------------------------------------------------*/
737 
738 /* --------- Defined-if-either-defined --------- */
739 
mkDifD8(MCEnv * mce,IRAtom * a1,IRAtom * a2)740 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
741    tl_assert(isShadowAtom(mce,a1));
742    tl_assert(isShadowAtom(mce,a2));
743    return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
744 }
745 
mkDifD16(MCEnv * mce,IRAtom * a1,IRAtom * a2)746 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
747    tl_assert(isShadowAtom(mce,a1));
748    tl_assert(isShadowAtom(mce,a2));
749    return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
750 }
751 
mkDifD32(MCEnv * mce,IRAtom * a1,IRAtom * a2)752 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
753    tl_assert(isShadowAtom(mce,a1));
754    tl_assert(isShadowAtom(mce,a2));
755    return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
756 }
757 
mkDifD64(MCEnv * mce,IRAtom * a1,IRAtom * a2)758 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
759    tl_assert(isShadowAtom(mce,a1));
760    tl_assert(isShadowAtom(mce,a2));
761    return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
762 }
763 
mkDifDV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)764 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
765    tl_assert(isShadowAtom(mce,a1));
766    tl_assert(isShadowAtom(mce,a2));
767    return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
768 }
769 
770 /* --------- Undefined-if-either-undefined --------- */
771 
mkUifU8(MCEnv * mce,IRAtom * a1,IRAtom * a2)772 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
773    tl_assert(isShadowAtom(mce,a1));
774    tl_assert(isShadowAtom(mce,a2));
775    return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
776 }
777 
mkUifU16(MCEnv * mce,IRAtom * a1,IRAtom * a2)778 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
779    tl_assert(isShadowAtom(mce,a1));
780    tl_assert(isShadowAtom(mce,a2));
781    return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
782 }
783 
mkUifU32(MCEnv * mce,IRAtom * a1,IRAtom * a2)784 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
785    tl_assert(isShadowAtom(mce,a1));
786    tl_assert(isShadowAtom(mce,a2));
787    return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
788 }
789 
mkUifU64(MCEnv * mce,IRAtom * a1,IRAtom * a2)790 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
791    tl_assert(isShadowAtom(mce,a1));
792    tl_assert(isShadowAtom(mce,a2));
793    return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
794 }
795 
mkUifUV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)796 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
797    tl_assert(isShadowAtom(mce,a1));
798    tl_assert(isShadowAtom(mce,a2));
799    return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
800 }
801 
mkUifU(MCEnv * mce,IRType vty,IRAtom * a1,IRAtom * a2)802 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
803    switch (vty) {
804       case Ity_I8:   return mkUifU8(mce, a1, a2);
805       case Ity_I16:  return mkUifU16(mce, a1, a2);
806       case Ity_I32:  return mkUifU32(mce, a1, a2);
807       case Ity_I64:  return mkUifU64(mce, a1, a2);
808       case Ity_V128: return mkUifUV128(mce, a1, a2);
809       default:
810          VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
811          VG_(tool_panic)("memcheck:mkUifU");
812    }
813 }
814 
815 /* --------- The Left-family of operations. --------- */
816 
mkLeft8(MCEnv * mce,IRAtom * a1)817 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
818    tl_assert(isShadowAtom(mce,a1));
819    /* It's safe to duplicate a1 since it's only an atom */
820    return assignNew(mce, Ity_I8,
821                     binop(Iop_Or8, a1,
822                           assignNew(mce, Ity_I8,
823                                     /* unop(Iop_Neg8, a1)))); */
824                                     binop(Iop_Sub8, mkU8(0), a1) )));
825 }
826 
mkLeft16(MCEnv * mce,IRAtom * a1)827 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
828    tl_assert(isShadowAtom(mce,a1));
829    /* It's safe to duplicate a1 since it's only an atom */
830    return assignNew(mce, Ity_I16,
831                     binop(Iop_Or16, a1,
832                           assignNew(mce, Ity_I16,
833                                     /* unop(Iop_Neg16, a1)))); */
834                                     binop(Iop_Sub16, mkU16(0), a1) )));
835 }
836 
mkLeft32(MCEnv * mce,IRAtom * a1)837 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
838    tl_assert(isShadowAtom(mce,a1));
839    /* It's safe to duplicate a1 since it's only an atom */
840    return assignNew(mce, Ity_I32,
841                     binop(Iop_Or32, a1,
842                           assignNew(mce, Ity_I32,
843                                     /* unop(Iop_Neg32, a1)))); */
844                                     binop(Iop_Sub32, mkU32(0), a1) )));
845 }
846 
847 /* --------- 'Improvement' functions for AND/OR. --------- */
848 
849 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
850    defined (0); all other -> undefined (1).
851 */
mkImproveAND8(MCEnv * mce,IRAtom * data,IRAtom * vbits)852 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
853 {
854    tl_assert(isOriginalAtom(mce, data));
855    tl_assert(isShadowAtom(mce, vbits));
856    tl_assert(sameKindedAtoms(data, vbits));
857    return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
858 }
859 
mkImproveAND16(MCEnv * mce,IRAtom * data,IRAtom * vbits)860 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
861 {
862    tl_assert(isOriginalAtom(mce, data));
863    tl_assert(isShadowAtom(mce, vbits));
864    tl_assert(sameKindedAtoms(data, vbits));
865    return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
866 }
867 
mkImproveAND32(MCEnv * mce,IRAtom * data,IRAtom * vbits)868 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
869 {
870    tl_assert(isOriginalAtom(mce, data));
871    tl_assert(isShadowAtom(mce, vbits));
872    tl_assert(sameKindedAtoms(data, vbits));
873    return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
874 }
875 
mkImproveAND64(MCEnv * mce,IRAtom * data,IRAtom * vbits)876 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
877 {
878    tl_assert(isOriginalAtom(mce, data));
879    tl_assert(isShadowAtom(mce, vbits));
880    tl_assert(sameKindedAtoms(data, vbits));
881    return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
882 }
883 
mkImproveANDV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)884 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
885 {
886    tl_assert(isOriginalAtom(mce, data));
887    tl_assert(isShadowAtom(mce, vbits));
888    tl_assert(sameKindedAtoms(data, vbits));
889    return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
890 }
891 
892 /* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
893    defined (0); all other -> undefined (1).
894 */
mkImproveOR8(MCEnv * mce,IRAtom * data,IRAtom * vbits)895 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
896 {
897    tl_assert(isOriginalAtom(mce, data));
898    tl_assert(isShadowAtom(mce, vbits));
899    tl_assert(sameKindedAtoms(data, vbits));
900    return assignNew(
901              mce, Ity_I8,
902              binop(Iop_Or8,
903                    assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
904                    vbits) );
905 }
906 
mkImproveOR16(MCEnv * mce,IRAtom * data,IRAtom * vbits)907 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
908 {
909    tl_assert(isOriginalAtom(mce, data));
910    tl_assert(isShadowAtom(mce, vbits));
911    tl_assert(sameKindedAtoms(data, vbits));
912    return assignNew(
913              mce, Ity_I16,
914              binop(Iop_Or16,
915                    assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
916                    vbits) );
917 }
918 
mkImproveOR32(MCEnv * mce,IRAtom * data,IRAtom * vbits)919 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
920 {
921    tl_assert(isOriginalAtom(mce, data));
922    tl_assert(isShadowAtom(mce, vbits));
923    tl_assert(sameKindedAtoms(data, vbits));
924    return assignNew(
925              mce, Ity_I32,
926              binop(Iop_Or32,
927                    assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
928                    vbits) );
929 }
930 
mkImproveOR64(MCEnv * mce,IRAtom * data,IRAtom * vbits)931 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
932 {
933    tl_assert(isOriginalAtom(mce, data));
934    tl_assert(isShadowAtom(mce, vbits));
935    tl_assert(sameKindedAtoms(data, vbits));
936    return assignNew(
937              mce, Ity_I64,
938              binop(Iop_Or64,
939                    assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
940                    vbits) );
941 }
942 
mkImproveORV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)943 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
944 {
945    tl_assert(isOriginalAtom(mce, data));
946    tl_assert(isShadowAtom(mce, vbits));
947    tl_assert(sameKindedAtoms(data, vbits));
948    return assignNew(
949              mce, Ity_V128,
950              binop(Iop_OrV128,
951                    assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
952                    vbits) );
953 }
954 
955 /* --------- Pessimising casts. --------- */
956 
mkPCastTo(MCEnv * mce,IRType dst_ty,IRAtom * vbits)957 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
958 {
959    IRType  ty;
960    IRAtom* tmp1;
961    /* Note, dst_ty is a shadow type, not an original type. */
962    /* First of all, collapse vbits down to a single bit. */
963    tl_assert(isShadowAtom(mce,vbits));
964    ty   = typeOfIRExpr(mce->bb->tyenv, vbits);
965    tmp1 = NULL;
966    switch (ty) {
967       case Ity_I1:
968          tmp1 = vbits;
969          break;
970       case Ity_I8:
971          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
972          break;
973       case Ity_I16:
974          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
975          break;
976       case Ity_I32:
977          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
978          break;
979       case Ity_I64:
980          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
981          break;
982       default:
983          VG_(tool_panic)("mkPCastTo(1)");
984    }
985    tl_assert(tmp1);
986    /* Now widen up to the dst type. */
987    switch (dst_ty) {
988       case Ity_I1:
989          return tmp1;
990       case Ity_I8:
991          return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
992       case Ity_I16:
993          return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
994       case Ity_I32:
995          return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
996       case Ity_I64:
997          return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
998       case Ity_V128:
999          tmp1 = assignNew(mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
1000          tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
1001          return tmp1;
1002       default:
1003          ppIRType(dst_ty);
1004          VG_(tool_panic)("mkPCastTo(2)");
1005    }
1006 }
1007 
1008 
1009 /*------------------------------------------------------------*/
1010 /*--- Emit a test and complaint if something is undefined. ---*/
1011 /*------------------------------------------------------------*/
1012 
1013 /* Set the annotations on a dirty helper to indicate that the stack
1014    pointer and instruction pointers might be read.  This is the
1015    behaviour of all 'emit-a-complaint' style functions we might
1016    call. */
1017 
setHelperAnns(MCEnv * mce,IRDirty * di)1018 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1019    di->nFxState = 2;
1020    di->fxState[0].fx     = Ifx_Read;
1021    di->fxState[0].offset = mce->layout->offset_SP;
1022    di->fxState[0].size   = mce->layout->sizeof_SP;
1023    di->fxState[1].fx     = Ifx_Read;
1024    di->fxState[1].offset = mce->layout->offset_IP;
1025    di->fxState[1].size   = mce->layout->sizeof_IP;
1026 }
1027 
1028 
1029 /* Check the supplied **original** atom for undefinedness, and emit a
1030    complaint if so.  Once that happens, mark it as defined.  This is
1031    possible because the atom is either a tmp or literal.  If it's a
1032    tmp, it will be shadowed by a tmp, and so we can set the shadow to
1033    be defined.  In fact as mentioned above, we will have to allocate a
1034    new tmp to carry the new 'defined' shadow value, and update the
1035    original->tmp mapping accordingly; we cannot simply assign a new
1036    value to an existing shadow tmp as this breaks SSAness -- resulting
1037    in the post-instrumentation sanity checker spluttering in disapproval.
1038 */
complainIfUndefined(MCEnv * mce,IRAtom * atom)1039 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1040 {
1041    IRAtom*  vatom;
1042    IRType   ty;
1043    Int      sz;
1044    IRDirty* di;
1045    IRAtom*  cond;
1046 
1047    /* Since the original expression is atomic, there's no duplicated
1048       work generated by making multiple V-expressions for it.  So we
1049       don't really care about the possibility that someone else may
1050       also create a V-interpretion for it. */
1051    tl_assert(isOriginalAtom(mce, atom));
1052    vatom = expr2vbits( mce, atom );
1053    tl_assert(isShadowAtom(mce, vatom));
1054    tl_assert(sameKindedAtoms(atom, vatom));
1055 
1056    ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1057 
1058    /* sz is only used for constructing the error message */
1059    sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1060 
1061    cond = mkPCastTo( mce, Ity_I1, vatom );
1062    /* cond will be 0 if all defined, and 1 if any not defined. */
1063 
1064    switch (sz) {
1065       case 0:
1066          di = unsafeIRDirty_0_N( 0/*regparms*/,
1067                                  "MC_(helperc_value_check0_fail)",
1068                                  &MC_(helperc_value_check0_fail),
1069                                  mkIRExprVec_0()
1070                                );
1071          break;
1072       case 1:
1073          di = unsafeIRDirty_0_N( 0/*regparms*/,
1074                                  "MC_(helperc_value_check1_fail)",
1075                                  &MC_(helperc_value_check1_fail),
1076                                  mkIRExprVec_0()
1077                                );
1078          break;
1079       case 4:
1080          di = unsafeIRDirty_0_N( 0/*regparms*/,
1081                                  "MC_(helperc_value_check4_fail)",
1082                                  &MC_(helperc_value_check4_fail),
1083                                  mkIRExprVec_0()
1084                                );
1085          break;
1086       default:
1087          di = unsafeIRDirty_0_N( 1/*regparms*/,
1088                                  "MC_(helperc_complain_undef)",
1089                                  &MC_(helperc_complain_undef),
1090                                  mkIRExprVec_1( mkIRExpr_HWord( sz ))
1091                                );
1092          break;
1093    }
1094    di->guard = cond;
1095    setHelperAnns( mce, di );
1096    stmt( mce->bb, IRStmt_Dirty(di));
1097 
1098    /* Set the shadow tmp to be defined.  First, update the
1099       orig->shadow tmp mapping to reflect the fact that this shadow is
1100       getting a new value. */
1101    tl_assert(isIRAtom(vatom));
1102    /* sameKindedAtoms ... */
1103    if (vatom->tag == Iex_RdTmp) {
1104       tl_assert(atom->tag == Iex_RdTmp);
1105       newShadowTmp(mce, atom->Iex.RdTmp.tmp);
1106       assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
1107                       definedOfType(ty));
1108    }
1109 }
1110 
1111 
1112 /*------------------------------------------------------------*/
1113 /*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1114 /*------------------------------------------------------------*/
1115 
1116 /* Examine the always-defined sections declared in layout to see if
1117    the (offset,size) section is within one.  Note, is is an error to
1118    partially fall into such a region: (offset,size) should either be
1119    completely in such a region or completely not-in such a region.
1120 */
isAlwaysDefd(MCEnv * mce,Int offset,Int size)1121 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1122 {
1123    Int minoffD, maxoffD, i;
1124    Int minoff = offset;
1125    Int maxoff = minoff + size - 1;
1126    tl_assert((minoff & ~0xFFFF) == 0);
1127    tl_assert((maxoff & ~0xFFFF) == 0);
1128 
1129    for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1130       minoffD = mce->layout->alwaysDefd[i].offset;
1131       maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1132       tl_assert((minoffD & ~0xFFFF) == 0);
1133       tl_assert((maxoffD & ~0xFFFF) == 0);
1134 
1135       if (maxoff < minoffD || maxoffD < minoff)
1136          continue; /* no overlap */
1137       if (minoff >= minoffD && maxoff <= maxoffD)
1138          return True; /* completely contained in an always-defd section */
1139 
1140       VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1141    }
1142    return False; /* could not find any containing section */
1143 }
1144 
1145 
1146 /* Generate into bb suitable actions to shadow this Put.  If the state
1147    slice is marked 'always defined', do nothing.  Otherwise, write the
1148    supplied V bits to the shadow state.  We can pass in either an
1149    original atom or a V-atom, but not both.  In the former case the
1150    relevant V-bits are then generated from the original.
1151 */
1152 static
do_shadow_PUT(MCEnv * mce,Int offset,IRAtom * atom,IRAtom * vatom)1153 void do_shadow_PUT ( MCEnv* mce,  Int offset,
1154                      IRAtom* atom, IRAtom* vatom )
1155 {
1156    IRType ty;
1157    if (atom) {
1158       tl_assert(!vatom);
1159       tl_assert(isOriginalAtom(mce, atom));
1160       vatom = expr2vbits( mce, atom );
1161    } else {
1162       tl_assert(vatom);
1163       tl_assert(isShadowAtom(mce, vatom));
1164    }
1165 
1166    ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1167    tl_assert(ty != Ity_I1);
1168    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1169       /* later: no ... */
1170       /* emit code to emit a complaint if any of the vbits are 1. */
1171       /* complainIfUndefined(mce, atom); */
1172    } else {
1173       /* Do a plain shadow Put. */
1174       stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1175    }
1176 }
1177 
1178 
1179 /* Return an expression which contains the V bits corresponding to the
1180    given GETI (passed in in pieces).
1181 */
1182 static
do_shadow_PUTI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias,IRAtom * atom)1183 void do_shadow_PUTI ( MCEnv* mce,
1184                       IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
1185 {
1186    IRAtom* vatom;
1187    IRType  ty, tyS;
1188    Int     arrSize;;
1189 
1190    tl_assert(isOriginalAtom(mce,atom));
1191    vatom = expr2vbits( mce, atom );
1192    tl_assert(sameKindedAtoms(atom, vatom));
1193    ty   = descr->elemTy;
1194    tyS  = shadowType(ty);
1195    arrSize = descr->nElems * sizeofIRType(ty);
1196    tl_assert(ty != Ity_I1);
1197    tl_assert(isOriginalAtom(mce,ix));
1198    complainIfUndefined(mce,ix);
1199    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1200       /* later: no ... */
1201       /* emit code to emit a complaint if any of the vbits are 1. */
1202       /* complainIfUndefined(mce, atom); */
1203    } else {
1204       /* Do a cloned version of the Put that refers to the shadow
1205          area. */
1206       IRRegArray* new_descr
1207          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1208                       tyS, descr->nElems);
1209       stmt( mce->bb, IRStmt_PutI( mkIRPutI( new_descr, ix, bias, vatom ) ));
1210    }
1211 }
1212 
1213 
1214 /* Return an expression which contains the V bits corresponding to the
1215    given GET (passed in in pieces).
1216 */
1217 static
shadow_GET(MCEnv * mce,Int offset,IRType ty)1218 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1219 {
1220    IRType tyS = shadowType(ty);
1221    tl_assert(ty != Ity_I1);
1222    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1223       /* Always defined, return all zeroes of the relevant type */
1224       return definedOfType(tyS);
1225    } else {
1226       /* return a cloned version of the Get that refers to the shadow
1227          area. */
1228       return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1229    }
1230 }
1231 
1232 
1233 /* Return an expression which contains the V bits corresponding to the
1234    given GETI (passed in in pieces).
1235 */
1236 static
shadow_GETI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias)1237 IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
1238 {
1239    IRType ty   = descr->elemTy;
1240    IRType tyS  = shadowType(ty);
1241    Int arrSize = descr->nElems * sizeofIRType(ty);
1242    tl_assert(ty != Ity_I1);
1243    tl_assert(isOriginalAtom(mce,ix));
1244    complainIfUndefined(mce,ix);
1245    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1246       /* Always defined, return all zeroes of the relevant type */
1247       return definedOfType(tyS);
1248    } else {
1249       /* return a cloned version of the Get that refers to the shadow
1250          area. */
1251       IRRegArray* new_descr
1252          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1253                       tyS, descr->nElems);
1254       return IRExpr_GetI( new_descr, ix, bias );
1255    }
1256 }
1257 
1258 
1259 /*------------------------------------------------------------*/
1260 /*--- Generating approximations for unknown operations,    ---*/
1261 /*--- using lazy-propagate semantics                       ---*/
1262 /*------------------------------------------------------------*/
1263 
1264 /* Lazy propagation of undefinedness from two values, resulting in the
1265    specified shadow type.
1266 */
1267 static
mkLazy2(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2)1268 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1269 {
1270    /* force everything via 32-bit intermediaries. */
1271    IRAtom* at;
1272    tl_assert(isShadowAtom(mce,va1));
1273    tl_assert(isShadowAtom(mce,va2));
1274    at = mkPCastTo(mce, Ity_I32, va1);
1275    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1276    at = mkPCastTo(mce, finalVty, at);
1277    return at;
1278 }
1279 
1280 
1281 /* Do the lazy propagation game from a null-terminated vector of
1282    atoms.  This is presumably the arguments to a helper call, so the
1283    IRCallee info is also supplied in order that we can know which
1284    arguments should be ignored (via the .mcx_mask field).
1285 */
1286 static
mkLazyN(MCEnv * mce,IRAtom ** exprvec,IRType finalVtype,IRCallee * cee)1287 IRAtom* mkLazyN ( MCEnv* mce,
1288                   IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1289 {
1290    Int i;
1291    IRAtom* here;
1292    IRAtom* curr = definedOfType(Ity_I32);
1293    for (i = 0; exprvec[i]; i++) {
1294       tl_assert(i < 32);
1295       tl_assert(isOriginalAtom(mce, exprvec[i]));
1296       /* Only take notice of this arg if the callee's mc-exclusion
1297          mask does not say it is to be excluded. */
1298       if (cee->mcx_mask & (1<<i)) {
1299          /* the arg is to be excluded from definedness checking.  Do
1300             nothing. */
1301          if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1302       } else {
1303          /* calculate the arg's definedness, and pessimistically merge
1304             it in. */
1305          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1306          curr = mkUifU32(mce, here, curr);
1307       }
1308    }
1309    return mkPCastTo(mce, finalVtype, curr );
1310 }
1311 
1312 
1313 /*------------------------------------------------------------*/
1314 /*--- Generating expensive sequences for exact carry-chain ---*/
1315 /*--- propagation in add/sub and related operations.       ---*/
1316 /*------------------------------------------------------------*/
1317 
1318 static
1319 __attribute__((unused))
expensiveAdd32(MCEnv * mce,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1320 IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
1321                                      IRAtom* aa,  IRAtom* bb )
1322 {
1323    IRAtom *a_min, *b_min, *a_max, *b_max;
1324    IRType ty;
1325    IROp   opAND, opOR, opXOR, opNOT, opADD;
1326 
1327    tl_assert(isShadowAtom(mce,qaa));
1328    tl_assert(isShadowAtom(mce,qbb));
1329    tl_assert(isOriginalAtom(mce,aa));
1330    tl_assert(isOriginalAtom(mce,bb));
1331    tl_assert(sameKindedAtoms(qaa,aa));
1332    tl_assert(sameKindedAtoms(qbb,bb));
1333 
1334    ty    = Ity_I32;
1335    opAND = Iop_And32;
1336    opOR  = Iop_Or32;
1337    opXOR = Iop_Xor32;
1338    opNOT = Iop_Not32;
1339    opADD = Iop_Add32;
1340 
1341    // a_min = aa & ~qaa
1342    a_min = assignNew(mce,ty,
1343                      binop(opAND, aa,
1344                                   assignNew(mce,ty, unop(opNOT, qaa))));
1345 
1346    // b_min = bb & ~qbb
1347    b_min = assignNew(mce,ty,
1348                      binop(opAND, bb,
1349                                   assignNew(mce,ty, unop(opNOT, qbb))));
1350 
1351    // a_max = aa | qaa
1352    a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1353 
1354    // b_max = bb | qbb
1355    b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1356 
1357    // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1358    return
1359    assignNew(mce,ty,
1360       binop( opOR,
1361              assignNew(mce,ty, binop(opOR, qaa, qbb)),
1362              assignNew(mce,ty,
1363                 binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
1364                              assignNew(mce,ty, binop(opADD, a_max, b_max))
1365                 )
1366              )
1367       )
1368    );
1369 }
1370 
1371 
1372 /*------------------------------------------------------------*/
1373 /*--- Helpers for dealing with vector primops.            ---*/
1374 /*------------------------------------------------------------*/
1375 
1376 /* Vector pessimisation -- pessimise within each lane individually. */
1377 
mkPCast8x16(MCEnv * mce,IRAtom * at)1378 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1379 {
1380    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1381 }
1382 
mkPCast16x8(MCEnv * mce,IRAtom * at)1383 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1384 {
1385    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1386 }
1387 
mkPCast32x4(MCEnv * mce,IRAtom * at)1388 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1389 {
1390    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1391 }
1392 
mkPCast64x2(MCEnv * mce,IRAtom * at)1393 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1394 {
1395    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1396 }
1397 
1398 
1399 /* Here's a simple scheme capable of handling ops derived from SSE1
1400    code and while only generating ops that can be efficiently
1401    implemented in SSE1. */
1402 
1403 /* All-lanes versions are straightforward:
1404 
1405    binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1406 
1407    unary32Fx4(x,y)    ==> PCast32x4(x#)
1408 
1409    Lowest-lane-only versions are more complex:
1410 
1411    binary32F0x4(x,y)  ==> SetV128lo32(
1412                              x#,
1413                              PCast32(V128to32(UifUV128(x#,y#)))
1414                           )
1415 
1416    This is perhaps not so obvious.  In particular, it's faster to
1417    do a V128-bit UifU and then take the bottom 32 bits than the more
1418    obvious scheme of taking the bottom 32 bits of each operand
1419    and doing a 32-bit UifU.  Basically since UifU is fast and
1420    chopping lanes off vector values is slow.
1421 
1422    Finally:
1423 
1424    unary32F0x4(x)     ==> SetV128lo32(
1425                              x#,
1426                              PCast32(V128to32(x#))
1427                           )
1428 
1429    Where:
1430 
1431    PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
1432    PCast32x4(v#) = CmpNEZ32x4(v#)
1433 */
1434 
1435 static
binary32Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1436 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1437 {
1438    IRAtom* at;
1439    tl_assert(isShadowAtom(mce, vatomX));
1440    tl_assert(isShadowAtom(mce, vatomY));
1441    at = mkUifUV128(mce, vatomX, vatomY);
1442    at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
1443    return at;
1444 }
1445 
1446 static
unary32Fx4(MCEnv * mce,IRAtom * vatomX)1447 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1448 {
1449    IRAtom* at;
1450    tl_assert(isShadowAtom(mce, vatomX));
1451    at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
1452    return at;
1453 }
1454 
1455 static
binary32F0x4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1456 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1457 {
1458    IRAtom* at;
1459    tl_assert(isShadowAtom(mce, vatomX));
1460    tl_assert(isShadowAtom(mce, vatomY));
1461    at = mkUifUV128(mce, vatomX, vatomY);
1462    at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
1463    at = mkPCastTo(mce, Ity_I32, at);
1464    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1465    return at;
1466 }
1467 
1468 static
unary32F0x4(MCEnv * mce,IRAtom * vatomX)1469 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1470 {
1471    IRAtom* at;
1472    tl_assert(isShadowAtom(mce, vatomX));
1473    at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
1474    at = mkPCastTo(mce, Ity_I32, at);
1475    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1476    return at;
1477 }
1478 
1479 /* --- ... and ... 64Fx2 versions of the same ... --- */
1480 
1481 static
binary64Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1482 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1483 {
1484    IRAtom* at;
1485    tl_assert(isShadowAtom(mce, vatomX));
1486    tl_assert(isShadowAtom(mce, vatomY));
1487    at = mkUifUV128(mce, vatomX, vatomY);
1488    at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
1489    return at;
1490 }
1491 
1492 static
unary64Fx2(MCEnv * mce,IRAtom * vatomX)1493 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1494 {
1495    IRAtom* at;
1496    tl_assert(isShadowAtom(mce, vatomX));
1497    at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
1498    return at;
1499 }
1500 
1501 static
binary64F0x2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1502 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1503 {
1504    IRAtom* at;
1505    tl_assert(isShadowAtom(mce, vatomX));
1506    tl_assert(isShadowAtom(mce, vatomY));
1507    at = mkUifUV128(mce, vatomX, vatomY);
1508    at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
1509    at = mkPCastTo(mce, Ity_I64, at);
1510    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1511    return at;
1512 }
1513 
1514 static
unary64F0x2(MCEnv * mce,IRAtom * vatomX)1515 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1516 {
1517    IRAtom* at;
1518    tl_assert(isShadowAtom(mce, vatomX));
1519    at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
1520    at = mkPCastTo(mce, Ity_I64, at);
1521    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1522    return at;
1523 }
1524 
1525 /* --- --- Vector saturated narrowing --- --- */
1526 
1527 /* This is quite subtle.  What to do is simple:
1528 
1529    Let the original narrowing op be QNarrowW{S,U}xN.  Produce:
1530 
1531       the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1532 
1533    Why this is right is not so simple.  Consider a lane in the args,
1534    vatom1 or 2, doesn't matter.
1535 
1536    After the PCast, that lane is all 0s (defined) or all
1537    1s(undefined).
1538 
1539    Both signed and unsigned saturating narrowing of all 0s produces
1540    all 0s, which is what we want.
1541 
1542    The all-1s case is more complex.  Unsigned narrowing interprets an
1543    all-1s input as the largest unsigned integer, and so produces all
1544    1s as a result since that is the largest unsigned value at the
1545    smaller width.
1546 
1547    Signed narrowing interprets all 1s as -1.  Fortunately, -1 narrows
1548    to -1, so we still wind up with all 1s at the smaller width.
1549 
1550    So: In short, pessimise the args, then apply the original narrowing
1551    op.
1552 */
1553 static
vectorNarrowV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)1554 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1555                           IRAtom* vatom1, IRAtom* vatom2)
1556 {
1557    IRAtom *at1, *at2, *at3;
1558    IRAtom* (*pcast)( MCEnv*, IRAtom* );
1559    switch (narrow_op) {
1560       case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
1561       case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
1562       case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
1563       default: VG_(tool_panic)("vectorNarrowV128");
1564    }
1565    tl_assert(isShadowAtom(mce,vatom1));
1566    tl_assert(isShadowAtom(mce,vatom2));
1567    at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1568    at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1569    at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1570    return at3;
1571 }
1572 
1573 
1574 /* --- --- Vector integer arithmetic --- --- */
1575 
1576 /* Simple ... UifU the args and per-lane pessimise the results. */
1577 static
binary8Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1578 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1579 {
1580    IRAtom* at;
1581    at = mkUifUV128(mce, vatom1, vatom2);
1582    at = mkPCast8x16(mce, at);
1583    return at;
1584 }
1585 
1586 static
binary16Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1587 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1588 {
1589    IRAtom* at;
1590    at = mkUifUV128(mce, vatom1, vatom2);
1591    at = mkPCast16x8(mce, at);
1592    return at;
1593 }
1594 
1595 static
binary32Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1596 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1597 {
1598    IRAtom* at;
1599    at = mkUifUV128(mce, vatom1, vatom2);
1600    at = mkPCast32x4(mce, at);
1601    return at;
1602 }
1603 
1604 static
binary64Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1605 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1606 {
1607    IRAtom* at;
1608    at = mkUifUV128(mce, vatom1, vatom2);
1609    at = mkPCast64x2(mce, at);
1610    return at;
1611 }
1612 
1613 
1614 /*------------------------------------------------------------*/
1615 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
1616 /*------------------------------------------------------------*/
1617 
1618 static
expr2vbits_Binop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2)1619 IRAtom* expr2vbits_Binop ( MCEnv* mce,
1620                            IROp op,
1621                            IRAtom* atom1, IRAtom* atom2 )
1622 {
1623    IRType  and_or_ty;
1624    IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
1625    IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
1626    IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1627 
1628    IRAtom* vatom1 = expr2vbits( mce, atom1 );
1629    IRAtom* vatom2 = expr2vbits( mce, atom2 );
1630 
1631    tl_assert(isOriginalAtom(mce,atom1));
1632    tl_assert(isOriginalAtom(mce,atom2));
1633    tl_assert(isShadowAtom(mce,vatom1));
1634    tl_assert(isShadowAtom(mce,vatom2));
1635    tl_assert(sameKindedAtoms(atom1,vatom1));
1636    tl_assert(sameKindedAtoms(atom2,vatom2));
1637    switch (op) {
1638 
1639       /* V128-bit SIMD (SSE2-esque) */
1640 
1641       case Iop_ShrN16x8:
1642       case Iop_ShrN32x4:
1643       case Iop_ShrN64x2:
1644       case Iop_SarN16x8:
1645       case Iop_SarN32x4:
1646       case Iop_ShlN16x8:
1647       case Iop_ShlN32x4:
1648       case Iop_ShlN64x2:
1649          /* Same scheme as with all other shifts. */
1650          complainIfUndefined(mce, atom2);
1651          return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1652 
1653       case Iop_QSub8Ux16:
1654       case Iop_QSub8Sx16:
1655       case Iop_Sub8x16:
1656       case Iop_Min8Ux16:
1657       case Iop_Max8Ux16:
1658       case Iop_CmpGT8Sx16:
1659       case Iop_CmpEQ8x16:
1660       case Iop_Avg8Ux16:
1661       case Iop_QAdd8Ux16:
1662       case Iop_QAdd8Sx16:
1663       case Iop_Add8x16:
1664          return binary8Ix16(mce, vatom1, vatom2);
1665 
1666       case Iop_QSub16Ux8:
1667       case Iop_QSub16Sx8:
1668       case Iop_Sub16x8:
1669       case Iop_Mul16x8:
1670       case Iop_MulHi16Sx8:
1671       case Iop_MulHi16Ux8:
1672       case Iop_Min16Sx8:
1673       case Iop_Max16Sx8:
1674       case Iop_CmpGT16Sx8:
1675       case Iop_CmpEQ16x8:
1676       case Iop_Avg16Ux8:
1677       case Iop_QAdd16Ux8:
1678       case Iop_QAdd16Sx8:
1679       case Iop_Add16x8:
1680          return binary16Ix8(mce, vatom1, vatom2);
1681 
1682       case Iop_Sub32x4:
1683       case Iop_QSub32Sx4:
1684       case Iop_QSub32Ux4:
1685       case Iop_CmpGT32Sx4:
1686       case Iop_CmpEQ32x4:
1687       case Iop_Add32x4:
1688       case Iop_QAdd32Ux4:
1689       case Iop_QAdd32Sx4:
1690          return binary32Ix4(mce, vatom1, vatom2);
1691 
1692       case Iop_Sub64x2:
1693       case Iop_QSub64Ux2:
1694       case Iop_QSub64Sx2:
1695       case Iop_Add64x2:
1696       case Iop_QAdd64Ux2:
1697       case Iop_QAdd64Sx2:
1698          return binary64Ix2(mce, vatom1, vatom2);
1699 
1700       case Iop_QNarrowBin32Sto16Sx8:
1701       case Iop_QNarrowBin16Sto8Sx16:
1702       case Iop_QNarrowBin16Sto8Ux16:
1703          return vectorNarrowV128(mce, op, vatom1, vatom2);
1704 
1705       case Iop_Sub64Fx2:
1706       case Iop_Mul64Fx2:
1707       case Iop_Min64Fx2:
1708       case Iop_Max64Fx2:
1709       case Iop_Div64Fx2:
1710       case Iop_CmpLT64Fx2:
1711       case Iop_CmpLE64Fx2:
1712       case Iop_CmpEQ64Fx2:
1713       case Iop_Add64Fx2:
1714          return binary64Fx2(mce, vatom1, vatom2);
1715 
1716       case Iop_Sub64F0x2:
1717       case Iop_Mul64F0x2:
1718       case Iop_Min64F0x2:
1719       case Iop_Max64F0x2:
1720       case Iop_Div64F0x2:
1721       case Iop_CmpLT64F0x2:
1722       case Iop_CmpLE64F0x2:
1723       case Iop_CmpEQ64F0x2:
1724       case Iop_Add64F0x2:
1725          return binary64F0x2(mce, vatom1, vatom2);
1726 
1727       /* V128-bit SIMD (SSE1-esque) */
1728 
1729       case Iop_Sub32Fx4:
1730       case Iop_Mul32Fx4:
1731       case Iop_Min32Fx4:
1732       case Iop_Max32Fx4:
1733       case Iop_Div32Fx4:
1734       case Iop_CmpLT32Fx4:
1735       case Iop_CmpLE32Fx4:
1736       case Iop_CmpEQ32Fx4:
1737       case Iop_Add32Fx4:
1738          return binary32Fx4(mce, vatom1, vatom2);
1739 
1740       case Iop_Sub32F0x4:
1741       case Iop_Mul32F0x4:
1742       case Iop_Min32F0x4:
1743       case Iop_Max32F0x4:
1744       case Iop_Div32F0x4:
1745       case Iop_CmpLT32F0x4:
1746       case Iop_CmpLE32F0x4:
1747       case Iop_CmpEQ32F0x4:
1748       case Iop_Add32F0x4:
1749          return binary32F0x4(mce, vatom1, vatom2);
1750 
1751       /* V128-bit data-steering */
1752       case Iop_SetV128lo32:
1753       case Iop_SetV128lo64:
1754       case Iop_64HLtoV128:
1755       case Iop_InterleaveLO64x2:
1756       case Iop_InterleaveLO32x4:
1757       case Iop_InterleaveLO16x8:
1758       case Iop_InterleaveLO8x16:
1759       case Iop_InterleaveHI64x2:
1760       case Iop_InterleaveHI32x4:
1761       case Iop_InterleaveHI16x8:
1762       case Iop_InterleaveHI8x16:
1763          return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1764 
1765       /* Scalar floating point */
1766 
1767          //      case Iop_RoundF64:
1768       case Iop_F64toI64S:
1769       case Iop_I64StoF64:
1770          /* First arg is I32 (rounding mode), second is F64 or I64
1771             (data). */
1772          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1773 
1774       case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1775          /* Takes two F64 args. */
1776       case Iop_F64toI32S:
1777       case Iop_F64toF32:
1778          /* First arg is I32 (rounding mode), second is F64 (data). */
1779          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1780 
1781       case Iop_F64toI16S:
1782          /* First arg is I32 (rounding mode), second is F64 (data). */
1783          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1784 
1785       case Iop_ScaleF64:
1786       case Iop_Yl2xF64:
1787       case Iop_Yl2xp1F64:
1788       case Iop_PRemF64:
1789       case Iop_AtanF64:
1790       case Iop_AddF64:
1791       case Iop_DivF64:
1792       case Iop_SubF64:
1793       case Iop_MulF64:
1794          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1795 
1796       case Iop_CmpF64:
1797          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1798 
1799       /* non-FP after here */
1800 
1801       case Iop_DivModU64to32:
1802       case Iop_DivModS64to32:
1803          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1804 
1805       case Iop_16HLto32:
1806          return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
1807       case Iop_32HLto64:
1808          return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1809 
1810       case Iop_MullS32:
1811       case Iop_MullU32: {
1812          IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1813          IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1814          return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1815       }
1816 
1817       case Iop_MullS16:
1818       case Iop_MullU16: {
1819          IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1820          IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1821          return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1822       }
1823 
1824       case Iop_MullS8:
1825       case Iop_MullU8: {
1826          IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1827          IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1828          return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1829       }
1830 
1831       case Iop_Add32:
1832 #        if 0
1833          return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
1834 #        endif
1835       case Iop_Sub32:
1836       case Iop_Mul32:
1837          return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1838 
1839       case Iop_Mul16:
1840       case Iop_Add16:
1841       case Iop_Sub16:
1842          return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1843 
1844       case Iop_Sub8:
1845       case Iop_Add8:
1846          return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1847 
1848       case Iop_CmpLE32S: case Iop_CmpLE32U:
1849       case Iop_CmpLT32U: case Iop_CmpLT32S:
1850       case Iop_CmpEQ32: case Iop_CmpNE32:
1851          return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1852 
1853       case Iop_CmpEQ16: case Iop_CmpNE16:
1854          return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1855 
1856       case Iop_CmpEQ8: case Iop_CmpNE8:
1857          return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1858 
1859       case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1860          /* Complain if the shift amount is undefined.  Then simply
1861             shift the first arg's V bits by the real shift amount. */
1862          complainIfUndefined(mce, atom2);
1863          return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1864 
1865       case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
1866          /* Same scheme as with 32-bit shifts. */
1867          complainIfUndefined(mce, atom2);
1868          return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1869 
1870       case Iop_Shl8: case Iop_Shr8:
1871          /* Same scheme as with 32-bit shifts. */
1872          complainIfUndefined(mce, atom2);
1873          return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1874 
1875       case Iop_Shl64: case Iop_Shr64:
1876          /* Same scheme as with 32-bit shifts. */
1877          complainIfUndefined(mce, atom2);
1878          return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1879 
1880       case Iop_AndV128:
1881          uifu = mkUifUV128; difd = mkDifDV128;
1882          and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
1883       case Iop_And64:
1884          uifu = mkUifU64; difd = mkDifD64;
1885          and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
1886       case Iop_And32:
1887          uifu = mkUifU32; difd = mkDifD32;
1888          and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1889       case Iop_And16:
1890          uifu = mkUifU16; difd = mkDifD16;
1891          and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1892       case Iop_And8:
1893          uifu = mkUifU8; difd = mkDifD8;
1894          and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1895 
1896       case Iop_OrV128:
1897          uifu = mkUifUV128; difd = mkDifDV128;
1898          and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
1899       case Iop_Or64:
1900          uifu = mkUifU64; difd = mkDifD64;
1901          and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
1902       case Iop_Or32:
1903          uifu = mkUifU32; difd = mkDifD32;
1904          and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1905       case Iop_Or16:
1906          uifu = mkUifU16; difd = mkDifD16;
1907          and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1908       case Iop_Or8:
1909          uifu = mkUifU8; difd = mkDifD8;
1910          and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1911 
1912       do_And_Or:
1913          return
1914          assignNew(
1915             mce,
1916             and_or_ty,
1917             difd(mce, uifu(mce, vatom1, vatom2),
1918                       difd(mce, improve(mce, atom1, vatom1),
1919                                 improve(mce, atom2, vatom2) ) ) );
1920 
1921       case Iop_Xor8:
1922          return mkUifU8(mce, vatom1, vatom2);
1923       case Iop_Xor16:
1924          return mkUifU16(mce, vatom1, vatom2);
1925       case Iop_Xor32:
1926          return mkUifU32(mce, vatom1, vatom2);
1927       case Iop_Xor64:
1928          return mkUifU64(mce, vatom1, vatom2);
1929       case Iop_XorV128:
1930          return mkUifUV128(mce, vatom1, vatom2);
1931 
1932       default:
1933          ppIROp(op);
1934          VG_(tool_panic)("memcheck:expr2vbits_Binop");
1935    }
1936 }
1937 
1938 
1939 static
expr2vbits_Unop(MCEnv * mce,IROp op,IRAtom * atom)1940 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1941 {
1942    IRAtom* vatom = expr2vbits( mce, atom );
1943    tl_assert(isOriginalAtom(mce,atom));
1944    switch (op) {
1945 
1946       case Iop_Sqrt64Fx2:
1947          return unary64Fx2(mce, vatom);
1948 
1949       case Iop_Sqrt64F0x2:
1950          return unary64F0x2(mce, vatom);
1951 
1952       case Iop_Sqrt32Fx4:
1953       case Iop_RSqrt32Fx4:
1954       case Iop_Recip32Fx4:
1955          return unary32Fx4(mce, vatom);
1956 
1957       case Iop_Sqrt32F0x4:
1958       case Iop_RSqrt32F0x4:
1959       case Iop_Recip32F0x4:
1960          return unary32F0x4(mce, vatom);
1961 
1962       case Iop_32UtoV128:
1963       case Iop_64UtoV128:
1964          return assignNew(mce, Ity_V128, unop(op, vatom));
1965 
1966       case Iop_F32toF64:
1967       case Iop_I32StoF64:
1968       case Iop_NegF64:
1969       case Iop_SinF64:
1970       case Iop_CosF64:
1971       case Iop_TanF64:
1972       case Iop_SqrtF64:
1973       case Iop_AbsF64:
1974       case Iop_2xm1F64:
1975          return mkPCastTo(mce, Ity_I64, vatom);
1976 
1977       case Iop_Clz32:
1978       case Iop_Ctz32:
1979          return mkPCastTo(mce, Ity_I32, vatom);
1980 
1981       case Iop_32Sto64:
1982       case Iop_32Uto64:
1983       case Iop_V128to64:
1984       case Iop_V128HIto64:
1985          return assignNew(mce, Ity_I64, unop(op, vatom));
1986 
1987       case Iop_64to32:
1988       case Iop_64HIto32:
1989       case Iop_1Uto32:
1990       case Iop_8Uto32:
1991       case Iop_16Uto32:
1992       case Iop_16Sto32:
1993       case Iop_8Sto32:
1994          return assignNew(mce, Ity_I32, unop(op, vatom));
1995 
1996       case Iop_8Sto16:
1997       case Iop_8Uto16:
1998       case Iop_32to16:
1999       case Iop_32HIto16:
2000          return assignNew(mce, Ity_I16, unop(op, vatom));
2001 
2002       case Iop_1Uto8:
2003       case Iop_16to8:
2004       case Iop_32to8:
2005          return assignNew(mce, Ity_I8, unop(op, vatom));
2006 
2007       case Iop_32to1:
2008          return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
2009 
2010       case Iop_ReinterpF64asI64:
2011       case Iop_ReinterpI64asF64:
2012       case Iop_ReinterpI32asF32:
2013       case Iop_NotV128:
2014       case Iop_Not64:
2015       case Iop_Not32:
2016       case Iop_Not16:
2017       case Iop_Not8:
2018       case Iop_Not1:
2019          return vatom;
2020 
2021       default:
2022          ppIROp(op);
2023          VG_(tool_panic)("memcheck:expr2vbits_Unop");
2024    }
2025 }
2026 
2027 
2028 /* Worker function; do not call directly. */
2029 static
expr2vbits_LDle_WRK(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias)2030 IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2031 {
2032    void*    helper;
2033    HChar*   hname;
2034    IRDirty* di;
2035    IRTemp   datavbits;
2036    IRAtom*  addrAct;
2037 
2038    tl_assert(isOriginalAtom(mce,addr));
2039 
2040    /* First, emit a definedness test for the address.  This also sets
2041       the address (shadow) to 'defined' following the test. */
2042    complainIfUndefined( mce, addr );
2043 
2044    /* Now cook up a call to the relevant helper function, to read the
2045       data V bits from shadow memory. */
2046    ty = shadowType(ty);
2047    switch (ty) {
2048       case Ity_I64: helper = &MC_(helperc_LOADV8);
2049                     hname = "MC_(helperc_LOADV8)";
2050                     break;
2051       case Ity_I32: helper = &MC_(helperc_LOADV4);
2052                     hname = "MC_(helperc_LOADV4)";
2053                     break;
2054       case Ity_I16: helper = &MC_(helperc_LOADV2);
2055                     hname = "MC_(helperc_LOADV2)";
2056                     break;
2057       case Ity_I8:  helper = &MC_(helperc_LOADV1);
2058                     hname = "MC_(helperc_LOADV1)";
2059                     break;
2060       default:      ppIRType(ty);
2061                     VG_(tool_panic)("memcheck:do_shadow_LDle");
2062    }
2063 
2064    /* Generate the actual address into addrAct. */
2065    if (bias == 0) {
2066       addrAct = addr;
2067    } else {
2068       IROp    mkAdd;
2069       IRAtom* eBias;
2070       IRType  tyAddr  = mce->hWordTy;
2071       tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2072       mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2073       eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2074       addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2075    }
2076 
2077    /* We need to have a place to park the V bits we're just about to
2078       read. */
2079    datavbits = newIRTemp(mce->bb->tyenv, ty);
2080    di = unsafeIRDirty_1_N( datavbits,
2081                            1/*regparms*/, hname, helper,
2082                            mkIRExprVec_1( addrAct ));
2083    setHelperAnns( mce, di );
2084    stmt( mce->bb, IRStmt_Dirty(di) );
2085 
2086    return mkexpr(datavbits);
2087 }
2088 
2089 
2090 static
expr2vbits_LDle(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias)2091 IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2092 {
2093    IRAtom *v64hi, *v64lo;
2094    switch (shadowType(ty)) {
2095       case Ity_I8:
2096       case Ity_I16:
2097       case Ity_I32:
2098       case Ity_I64:
2099          return expr2vbits_LDle_WRK(mce, ty, addr, bias);
2100       case Ity_V128:
2101          v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
2102          v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
2103          return assignNew( mce,
2104                            Ity_V128,
2105                            binop(Iop_64HLtoV128, v64hi, v64lo));
2106       default:
2107          VG_(tool_panic)("expr2vbits_LDle");
2108    }
2109 }
2110 
2111 
2112 static
expr2vbits_ITE(MCEnv * mce,IRAtom * cond,IRAtom * iftrue,IRAtom * iffalse)2113 IRAtom* expr2vbits_ITE ( MCEnv* mce,
2114                          IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
2115 {
2116    IRAtom *vbitsC, *vbits0, *vbits1;
2117    IRType ty;
2118    /* Given ITE(cond,iftrue,iffalse), generate
2119          ITE(cond,iftrue#,iffalse#) `UifU` PCast(cond#)
2120       That is, steer the V bits like the originals, but trash the
2121       result if the steering value is undefined.  This gives
2122       lazy propagation. */
2123    tl_assert(isOriginalAtom(mce, cond));
2124    tl_assert(isOriginalAtom(mce, iftrue));
2125    tl_assert(isOriginalAtom(mce, iffalse));
2126 
2127    vbitsC = expr2vbits(mce, cond);
2128    vbits0 = expr2vbits(mce, iffalse);
2129    vbits1 = expr2vbits(mce, iftrue);
2130    ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2131 
2132    return
2133       mkUifU(mce, ty, assignNew(mce, ty, IRExpr_ITE(cond, vbits1, vbits0)),
2134                       mkPCastTo(mce, ty, vbitsC) );
2135 }
2136 
2137 /* --------- This is the main expression-handling function. --------- */
2138 
2139 static
expr2vbits(MCEnv * mce,IRExpr * e)2140 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2141 {
2142    switch (e->tag) {
2143 
2144       case Iex_Get:
2145          return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2146 
2147       case Iex_GetI:
2148          return shadow_GETI( mce, e->Iex.GetI.descr,
2149                                   e->Iex.GetI.ix, e->Iex.GetI.bias );
2150 
2151       case Iex_RdTmp:
2152          return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
2153 
2154       case Iex_Const:
2155          return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2156 
2157       case Iex_Binop:
2158          return expr2vbits_Binop(
2159                    mce,
2160                    e->Iex.Binop.op,
2161                    e->Iex.Binop.arg1, e->Iex.Binop.arg2
2162                 );
2163 
2164       case Iex_Unop:
2165          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2166 
2167       case Iex_Load:
2168          return expr2vbits_LDle( mce, e->Iex.Load.ty,
2169                                       e->Iex.Load.addr, 0/*addr bias*/ );
2170 
2171       case Iex_CCall:
2172          return mkLazyN( mce, e->Iex.CCall.args,
2173                               e->Iex.CCall.retty,
2174                               e->Iex.CCall.cee );
2175 
2176       case Iex_ITE:
2177          return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
2178                                 e->Iex.ITE.iffalse);
2179 
2180       default:
2181          VG_(printf)("\n");
2182          ppIRExpr(e);
2183          VG_(printf)("\n");
2184          VG_(tool_panic)("memcheck: expr2vbits");
2185    }
2186 }
2187 
2188 /*------------------------------------------------------------*/
2189 /*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
2190 /*------------------------------------------------------------*/
2191 
2192 /* Widen a value to the host word size. */
2193 
2194 static
zwidenToHostWord(MCEnv * mce,IRAtom * vatom)2195 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2196 {
2197    IRType ty, tyH;
2198 
2199    /* vatom is vbits-value and as such can only have a shadow type. */
2200    tl_assert(isShadowAtom(mce,vatom));
2201 
2202    ty  = typeOfIRExpr(mce->bb->tyenv, vatom);
2203    tyH = mce->hWordTy;
2204 
2205    if (tyH == Ity_I32) {
2206       switch (ty) {
2207          case Ity_I32: return vatom;
2208          case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2209          case Ity_I8:  return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2210          default:      goto unhandled;
2211       }
2212    } else {
2213       goto unhandled;
2214    }
2215   unhandled:
2216    VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2217    VG_(tool_panic)("zwidenToHostWord");
2218 }
2219 
2220 
2221 /* Generate a shadow store.  addr is always the original address atom.
2222    You can pass in either originals or V-bits for the data atom, but
2223    obviously not both.  */
2224 
2225 static
do_shadow_STle(MCEnv * mce,IRAtom * addr,UInt bias,IRAtom * data,IRAtom * vdata)2226 void do_shadow_STle ( MCEnv* mce,
2227                       IRAtom* addr, UInt bias,
2228                       IRAtom* data, IRAtom* vdata )
2229 {
2230    IROp     mkAdd;
2231    IRType   ty, tyAddr;
2232    IRDirty  *di, *diLo64, *diHi64;
2233    IRAtom   *addrAct, *addrLo64, *addrHi64;
2234    IRAtom   *vdataLo64, *vdataHi64;
2235    IRAtom   *eBias, *eBias0, *eBias8;
2236    void*    helper = NULL;
2237    HChar*   hname = NULL;
2238 
2239    tyAddr = mce->hWordTy;
2240    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2241    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2242 
2243    di = diLo64 = diHi64 = NULL;
2244    eBias = eBias0 = eBias8 = NULL;
2245    addrAct = addrLo64 = addrHi64 = NULL;
2246    vdataLo64 = vdataHi64 = NULL;
2247 
2248    if (data) {
2249       tl_assert(!vdata);
2250       tl_assert(isOriginalAtom(mce, data));
2251       tl_assert(bias == 0);
2252       vdata = expr2vbits( mce, data );
2253    } else {
2254       tl_assert(vdata);
2255    }
2256 
2257    tl_assert(isOriginalAtom(mce,addr));
2258    tl_assert(isShadowAtom(mce,vdata));
2259 
2260    ty = typeOfIRExpr(mce->bb->tyenv, vdata);
2261 
2262    /* First, emit a definedness test for the address.  This also sets
2263       the address (shadow) to 'defined' following the test. */
2264    complainIfUndefined( mce, addr );
2265 
2266    /* Now decide which helper function to call to write the data V
2267       bits into shadow memory. */
2268    switch (ty) {
2269       case Ity_V128: /* we'll use the helper twice */
2270       case Ity_I64: helper = &MC_(helperc_STOREV8);
2271                     hname = "MC_(helperc_STOREV8)";
2272                     break;
2273       case Ity_I32: helper = &MC_(helperc_STOREV4);
2274                     hname = "MC_(helperc_STOREV4)";
2275                     break;
2276       case Ity_I16: helper = &MC_(helperc_STOREV2);
2277                     hname = "MC_(helperc_STOREV2)";
2278                     break;
2279       case Ity_I8:  helper = &MC_(helperc_STOREV1);
2280                     hname = "MC_(helperc_STOREV1)";
2281                     break;
2282       default:      VG_(tool_panic)("memcheck:do_shadow_STle");
2283    }
2284 
2285    if (ty == Ity_V128) {
2286 
2287       /* V128-bit case */
2288       /* See comment in next clause re 64-bit regparms */
2289       eBias0    = tyAddr==Ity_I32 ? mkU32(bias)   : mkU64(bias);
2290       addrLo64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
2291       vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
2292       diLo64    = unsafeIRDirty_0_N(
2293                      1/*regparms*/, hname, helper,
2294                      mkIRExprVec_2( addrLo64, vdataLo64 ));
2295 
2296       eBias8    = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2297       addrHi64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
2298       vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
2299       diHi64    = unsafeIRDirty_0_N(
2300                      1/*regparms*/, hname, helper,
2301                      mkIRExprVec_2( addrHi64, vdataHi64 ));
2302 
2303       setHelperAnns( mce, diLo64 );
2304       setHelperAnns( mce, diHi64 );
2305       stmt( mce->bb, IRStmt_Dirty(diLo64) );
2306       stmt( mce->bb, IRStmt_Dirty(diHi64) );
2307 
2308    } else {
2309 
2310       /* 8/16/32/64-bit cases */
2311       /* Generate the actual address into addrAct. */
2312       if (bias == 0) {
2313          addrAct = addr;
2314       } else {
2315          eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2316          addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2317       }
2318 
2319       if (ty == Ity_I64) {
2320          /* We can't do this with regparm 2 on 32-bit platforms, since
2321             the back ends aren't clever enough to handle 64-bit
2322             regparm args.  Therefore be different. */
2323          di = unsafeIRDirty_0_N(
2324                  1/*regparms*/, hname, helper,
2325                  mkIRExprVec_2( addrAct, vdata ));
2326       } else {
2327          di = unsafeIRDirty_0_N(
2328                  2/*regparms*/, hname, helper,
2329                  mkIRExprVec_2( addrAct,
2330                                 zwidenToHostWord( mce, vdata )));
2331       }
2332       setHelperAnns( mce, di );
2333       stmt( mce->bb, IRStmt_Dirty(di) );
2334    }
2335 
2336 }
2337 
2338 
2339 /* Do lazy pessimistic propagation through a dirty helper call, by
2340    looking at the annotations on it.  This is the most complex part of
2341    Memcheck. */
2342 
szToITy(Int n)2343 static IRType szToITy ( Int n )
2344 {
2345    switch (n) {
2346       case 1: return Ity_I8;
2347       case 2: return Ity_I16;
2348       case 4: return Ity_I32;
2349       case 8: return Ity_I64;
2350       default: VG_(tool_panic)("szToITy(memcheck)");
2351    }
2352 }
2353 
2354 static
do_shadow_Dirty(MCEnv * mce,IRDirty * d)2355 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2356 {
2357    Int     i, n, offset, toDo, gSz, gOff;
2358    IRAtom  *src, *here, *curr;
2359    IRType  tyAddr, tySrc, tyDst;
2360    IRTemp  dst;
2361 
2362    /* First check the guard. */
2363    complainIfUndefined(mce, d->guard);
2364 
2365    /* Now round up all inputs and PCast over them. */
2366    curr = definedOfType(Ity_I32);
2367 
2368    /* Inputs: unmasked args */
2369    for (i = 0; d->args[i]; i++) {
2370       if (d->cee->mcx_mask & (1<<i)) {
2371          /* ignore this arg */
2372       } else {
2373          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2374          curr = mkUifU32(mce, here, curr);
2375       }
2376    }
2377 
2378    /* Inputs: guest state that we read. */
2379    for (i = 0; i < d->nFxState; i++) {
2380       tl_assert(d->fxState[i].fx != Ifx_None);
2381       if (d->fxState[i].fx == Ifx_Write)
2382          continue;
2383 
2384       /* Ignore any sections marked as 'always defined'. */
2385       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
2386          if (0)
2387          VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2388                      d->fxState[i].offset, d->fxState[i].size );
2389          continue;
2390       }
2391 
2392       /* This state element is read or modified.  So we need to
2393          consider it.  If larger than 8 bytes, deal with it in 8-byte
2394          chunks. */
2395       gSz  = d->fxState[i].size;
2396       gOff = d->fxState[i].offset;
2397       tl_assert(gSz > 0);
2398       while (True) {
2399          if (gSz == 0) break;
2400          n = gSz <= 8 ? gSz : 8;
2401          /* update 'curr' with UifU of the state slice
2402             gOff .. gOff+n-1 */
2403          tySrc = szToITy( n );
2404          src   = assignNew( mce, tySrc,
2405                             shadow_GET(mce, gOff, tySrc ) );
2406          here = mkPCastTo( mce, Ity_I32, src );
2407          curr = mkUifU32(mce, here, curr);
2408          gSz -= n;
2409          gOff += n;
2410       }
2411 
2412    }
2413 
2414    /* Inputs: memory.  First set up some info needed regardless of
2415       whether we're doing reads or writes. */
2416    tyAddr = Ity_INVALID;
2417 
2418    if (d->mFx != Ifx_None) {
2419       /* Because we may do multiple shadow loads/stores from the same
2420          base address, it's best to do a single test of its
2421          definedness right now.  Post-instrumentation optimisation
2422          should remove all but this test. */
2423       tl_assert(d->mAddr);
2424       complainIfUndefined(mce, d->mAddr);
2425 
2426       tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2427       tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2428       tl_assert(tyAddr == mce->hWordTy); /* not really right */
2429    }
2430 
2431    /* Deal with memory inputs (reads or modifies) */
2432    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2433       offset = 0;
2434       toDo   = d->mSize;
2435       /* chew off 32-bit chunks */
2436       while (toDo >= 4) {
2437          here = mkPCastTo(
2438                    mce, Ity_I32,
2439                    expr2vbits_LDle ( mce, Ity_I32,
2440                                      d->mAddr, d->mSize - toDo )
2441                 );
2442          curr = mkUifU32(mce, here, curr);
2443          toDo -= 4;
2444       }
2445       /* chew off 16-bit chunks */
2446       while (toDo >= 2) {
2447          here = mkPCastTo(
2448                    mce, Ity_I32,
2449                    expr2vbits_LDle ( mce, Ity_I16,
2450                                      d->mAddr, d->mSize - toDo )
2451                 );
2452          curr = mkUifU32(mce, here, curr);
2453          toDo -= 2;
2454       }
2455       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2456    }
2457 
2458    /* Whew!  So curr is a 32-bit V-value summarising pessimistically
2459       all the inputs to the helper.  Now we need to re-distribute the
2460       results to all destinations. */
2461 
2462    /* Outputs: the destination temporary, if there is one. */
2463    if (d->tmp != IRTemp_INVALID) {
2464       dst   = findShadowTmp(mce, d->tmp);
2465       tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2466       assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2467    }
2468 
2469    /* Outputs: guest state that we write or modify. */
2470    for (i = 0; i < d->nFxState; i++) {
2471       tl_assert(d->fxState[i].fx != Ifx_None);
2472       if (d->fxState[i].fx == Ifx_Read)
2473          continue;
2474       /* Ignore any sections marked as 'always defined'. */
2475       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2476          continue;
2477       /* This state element is written or modified.  So we need to
2478          consider it.  If larger than 8 bytes, deal with it in 8-byte
2479          chunks. */
2480       gSz  = d->fxState[i].size;
2481       gOff = d->fxState[i].offset;
2482       tl_assert(gSz > 0);
2483       while (True) {
2484          if (gSz == 0) break;
2485          n = gSz <= 8 ? gSz : 8;
2486          /* Write suitably-casted 'curr' to the state slice
2487             gOff .. gOff+n-1 */
2488          tyDst = szToITy( n );
2489          do_shadow_PUT( mce, gOff,
2490                              NULL, /* original atom */
2491                              mkPCastTo( mce, tyDst, curr ) );
2492          gSz -= n;
2493          gOff += n;
2494       }
2495    }
2496 
2497    /* Outputs: memory that we write or modify. */
2498    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2499       offset = 0;
2500       toDo   = d->mSize;
2501       /* chew off 32-bit chunks */
2502       while (toDo >= 4) {
2503          do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2504                          NULL, /* original data */
2505                          mkPCastTo( mce, Ity_I32, curr ) );
2506          toDo -= 4;
2507       }
2508       /* chew off 16-bit chunks */
2509       while (toDo >= 2) {
2510          do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2511                          NULL, /* original data */
2512                          mkPCastTo( mce, Ity_I16, curr ) );
2513          toDo -= 2;
2514       }
2515       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2516    }
2517 
2518 }
2519 
2520 
2521 /*------------------------------------------------------------*/
2522 /*--- Memcheck main                                        ---*/
2523 /*------------------------------------------------------------*/
2524 
isBogusAtom(IRAtom * at)2525 static Bool isBogusAtom ( IRAtom* at )
2526 {
2527    ULong n = 0;
2528    IRConst* con;
2529    tl_assert(isIRAtom(at));
2530    if (at->tag == Iex_RdTmp)
2531       return False;
2532    tl_assert(at->tag == Iex_Const);
2533    con = at->Iex.Const.con;
2534    switch (con->tag) {
2535       case Ico_U8:  n = (ULong)con->Ico.U8; break;
2536       case Ico_U16: n = (ULong)con->Ico.U16; break;
2537       case Ico_U32: n = (ULong)con->Ico.U32; break;
2538       case Ico_U64: n = (ULong)con->Ico.U64; break;
2539       default: ppIRExpr(at); tl_assert(0);
2540    }
2541    /* VG_(printf)("%llx\n", n); */
2542    return (n == 0xFEFEFEFF
2543            || n == 0x80808080
2544            || n == 0x1010101
2545            || n == 1010100);
2546 }
2547 
2548 __attribute__((unused))
checkForBogusLiterals(IRStmt * st)2549 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2550 {
2551    Int     i;
2552    IRExpr* e;
2553    switch (st->tag) {
2554       case Ist_WrTmp:
2555          e = st->Ist.WrTmp.data;
2556          switch (e->tag) {
2557             case Iex_Get:
2558             case Iex_RdTmp:
2559                return False;
2560             case Iex_Unop:
2561                return isBogusAtom(e->Iex.Unop.arg);
2562             case Iex_Binop:
2563                return isBogusAtom(e->Iex.Binop.arg1)
2564                       || isBogusAtom(e->Iex.Binop.arg2);
2565             case Iex_ITE:
2566                return isBogusAtom(e->Iex.ITE.cond)
2567                       || isBogusAtom(e->Iex.ITE.iftrue)
2568                       || isBogusAtom(e->Iex.ITE.iffalse);
2569             case Iex_Load:
2570                return isBogusAtom(e->Iex.Load.addr);
2571             case Iex_CCall:
2572                for (i = 0; e->Iex.CCall.args[i]; i++)
2573                   if (isBogusAtom(e->Iex.CCall.args[i]))
2574                      return True;
2575                return False;
2576             default:
2577                goto unhandled;
2578          }
2579       case Ist_Put:
2580          return isBogusAtom(st->Ist.Put.data);
2581       case Ist_Store:
2582          return isBogusAtom(st->Ist.Store.addr)
2583                 || isBogusAtom(st->Ist.Store.data);
2584       case Ist_Exit:
2585          return isBogusAtom(st->Ist.Exit.guard);
2586       default:
2587       unhandled:
2588          ppIRStmt(st);
2589          VG_(tool_panic)("hasBogusLiterals");
2590    }
2591 }
2592 
mc_instrument(void * closureV,IRSB * bb_in,VexGuestLayout * layout,VexGuestExtents * vge,IRType gWordTy,IRType hWordTy)2593 IRSB* mc_instrument ( void* closureV,
2594                       IRSB* bb_in, VexGuestLayout* layout,
2595                       VexGuestExtents* vge,
2596                       IRType gWordTy, IRType hWordTy )
2597 {
2598    Bool verboze = False; //True;
2599 
2600    /* Bool hasBogusLiterals = False; */
2601 
2602    Int i, j, first_stmt;
2603    IRStmt* st;
2604    MCEnv mce;
2605 
2606    /* Set up BB */
2607    IRSB* bb     = emptyIRSB();
2608    bb->tyenv    = deepCopyIRTypeEnv(bb_in->tyenv);
2609    bb->next     = deepCopyIRExpr(bb_in->next);
2610    bb->jumpkind = bb_in->jumpkind;
2611 
2612    /* Set up the running environment.  Only .bb is modified as we go
2613       along. */
2614    mce.bb             = bb;
2615    mce.layout         = layout;
2616    mce.n_originalTmps = bb->tyenv->types_used;
2617    mce.hWordTy        = hWordTy;
2618    mce.tmpMap         = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2619    for (i = 0; i < mce.n_originalTmps; i++)
2620       mce.tmpMap[i] = IRTemp_INVALID;
2621 
2622    /* Iterate over the stmts. */
2623 
2624    for (i = 0; i <  bb_in->stmts_used; i++) {
2625       st = bb_in->stmts[i];
2626       if (!st) continue;
2627 
2628       tl_assert(isFlatIRStmt(st));
2629 
2630       /*
2631       if (!hasBogusLiterals) {
2632          hasBogusLiterals = checkForBogusLiterals(st);
2633          if (hasBogusLiterals) {
2634             VG_(printf)("bogus: ");
2635             ppIRStmt(st);
2636             VG_(printf)("\n");
2637          }
2638       }
2639       */
2640       first_stmt = bb->stmts_used;
2641 
2642       if (verboze) {
2643          ppIRStmt(st);
2644          VG_(printf)("\n\n");
2645       }
2646 
2647       switch (st->tag) {
2648 
2649          case Ist_WrTmp:
2650             assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
2651                         expr2vbits( &mce, st->Ist.WrTmp.data) );
2652             break;
2653 
2654          case Ist_Put:
2655             do_shadow_PUT( &mce,
2656                            st->Ist.Put.offset,
2657                            st->Ist.Put.data,
2658                            NULL /* shadow atom */ );
2659             break;
2660 
2661          case Ist_PutI:
2662             do_shadow_PUTI( &mce,
2663                             st->Ist.PutI.details->descr,
2664                             st->Ist.PutI.details->ix,
2665                             st->Ist.PutI.details->bias,
2666                             st->Ist.PutI.details->data );
2667             break;
2668 
2669          case Ist_Store:
2670             do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
2671                                   st->Ist.Store.data,
2672                                   NULL /* shadow data */ );
2673             break;
2674 
2675          case Ist_Exit:
2676             /* if (!hasBogusLiterals) */
2677                complainIfUndefined( &mce, st->Ist.Exit.guard );
2678             break;
2679 
2680          case Ist_Dirty:
2681             do_shadow_Dirty( &mce, st->Ist.Dirty.details );
2682             break;
2683 
2684          case Ist_IMark:
2685          case Ist_NoOp:
2686             break;
2687 
2688          default:
2689             VG_(printf)("\n");
2690             ppIRStmt(st);
2691             VG_(printf)("\n");
2692             VG_(tool_panic)("memcheck: unhandled IRStmt");
2693 
2694       } /* switch (st->tag) */
2695 
2696       if (verboze) {
2697          for (j = first_stmt; j < bb->stmts_used; j++) {
2698             VG_(printf)("   ");
2699             ppIRStmt(bb->stmts[j]);
2700             VG_(printf)("\n");
2701          }
2702          VG_(printf)("\n");
2703       }
2704 
2705       addStmtToIRSB(bb, st);
2706 
2707    }
2708 
2709    /* Now we need to complain if the jump target is undefined. */
2710    first_stmt = bb->stmts_used;
2711 
2712    if (verboze) {
2713       VG_(printf)("bb->next = ");
2714       ppIRExpr(bb->next);
2715       VG_(printf)("\n\n");
2716    }
2717 
2718    complainIfUndefined( &mce, bb->next );
2719 
2720    if (verboze) {
2721       for (j = first_stmt; j < bb->stmts_used; j++) {
2722          VG_(printf)("   ");
2723          ppIRStmt(bb->stmts[j]);
2724          VG_(printf)("\n");
2725       }
2726       VG_(printf)("\n");
2727    }
2728 
2729    return bb;
2730 }
2731 #endif /* UNUSED */
2732 
2733 /*--------------------------------------------------------------------*/
2734 /*--- end                                              test_main.c ---*/
2735 /*--------------------------------------------------------------------*/
2736