• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                       test_main.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2015 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <assert.h>
39 #include <string.h>
40 
41 #include "libvex_basictypes.h"
42 #include "libvex.h"
43 
44 #include "test_main.h"
45 
46 
47 /*---------------------------------------------------------------*/
48 /*--- Test                                                    ---*/
49 /*---------------------------------------------------------------*/
50 
51 
52 __attribute__ ((noreturn))
53 static
failure_exit(void)54 void failure_exit ( void )
55 {
56    fprintf(stdout, "VEX did failure_exit.  Bye.\n");
57    exit(1);
58 }
59 
60 static
log_bytes(const HChar * bytes,SizeT nbytes)61 void log_bytes ( const HChar* bytes, SizeT nbytes )
62 {
63    fwrite ( bytes, 1, nbytes, stdout );
64 }
65 
66 #define N_LINEBUF 10000
67 static HChar linebuf[N_LINEBUF];
68 
69 #define N_ORIGBUF 10000
70 #define N_TRANSBUF 5000
71 
72 static UChar origbuf[N_ORIGBUF];
73 static UChar transbuf[N_TRANSBUF];
74 
75 static Bool verbose = True;
76 
77 /* Forwards */
78 #if 1 /* UNUSED */
79 //static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
80 static
81 IRSB* mc_instrument ( void* closureV,
82                       IRSB* bb_in, VexGuestLayout* layout,
83                       VexGuestExtents* vge,
84                       IRType gWordTy, IRType hWordTy );
85 #endif
86 
chase_into_not_ok(void * opaque,Addr dst)87 static Bool chase_into_not_ok ( void* opaque, Addr dst ) {
88    return False;
89 }
needs_self_check(void * closureV,VexRegisterUpdates * pxControl,const VexGuestExtents * vge)90 static UInt needs_self_check ( void *closureV, VexRegisterUpdates *pxControl,
91                                const VexGuestExtents *vge ) {
92    return 0;
93 }
94 
main(int argc,char ** argv)95 int main ( int argc, char** argv )
96 {
97    FILE* f;
98    Int i;
99    UInt u, sum;
100    Addr32 orig_addr;
101    Int bb_number, n_bbs_done = 0;
102    Int orig_nbytes, trans_used;
103    VexTranslateResult tres;
104    VexControl vcon;
105    VexGuestExtents vge;
106    VexArchInfo vai_x86, vai_amd64, vai_ppc32, vai_arm, vai_mips32, vai_mips64;
107    VexAbiInfo vbi;
108    VexTranslateArgs vta;
109 
110    if (argc != 2) {
111       fprintf(stderr, "usage: vex file.orig\n");
112       exit(1);
113    }
114    f = fopen(argv[1], "r");
115    if (!f) {
116       fprintf(stderr, "can't open `%s'\n", argv[1]);
117       exit(1);
118    }
119 
120    /* Run with default params.  However, we can't allow bb chasing
121       since that causes the front end to get segfaults when it tries
122       to read code outside the initial BB we hand it.  So when calling
123       LibVEX_Translate, send in a chase-into predicate that always
124       returns False. */
125    LibVEX_default_VexControl ( &vcon );
126    vcon.iropt_level = 2;
127    vcon.guest_max_insns = 60;
128 
129    LibVEX_Init ( &failure_exit, &log_bytes,
130                  1,  /* debug_paranoia */
131                  &vcon );
132 
133 
134    while (!feof(f)) {
135 
136       __attribute__((unused))
137       char* unused1 = fgets(linebuf, N_LINEBUF,f);
138       if (linebuf[0] == 0) continue;
139       if (linebuf[0] != '.') continue;
140 
141       if (n_bbs_done == TEST_N_BBS) break;
142       n_bbs_done++;
143 
144       /* first line is:   . bb-number bb-addr n-bytes */
145       assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
146                                  & bb_number,
147                                  & orig_addr, & orig_nbytes ));
148       assert(orig_nbytes >= 1);
149       assert(!feof(f));
150       __attribute__((unused))
151       char* unused2 = fgets(linebuf, N_LINEBUF,f);
152       assert(linebuf[0] == '.');
153 
154       /* second line is:   . byte byte byte etc */
155       if (verbose)
156          printf("============ Basic Block %d, Done %d, "
157                 "Start %x, nbytes %2d ============",
158                 bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
159 
160       /* thumb ITstate analysis needs to examine the 18 bytes
161          preceding the first instruction.  So let's leave the first 18
162          zeroed out. */
163       memset(origbuf, 0, sizeof(origbuf));
164 
165       assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
166       for (i = 0; i < orig_nbytes; i++) {
167          assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
168          origbuf[18+ i] = (UChar)u;
169       }
170 
171       /* FIXME: put sensible values into the .hwcaps fields */
172       LibVEX_default_VexArchInfo(&vai_x86);
173       vai_x86.hwcaps = VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1
174                        | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
175       vai_x86.endness = VexEndnessLE;
176 
177       LibVEX_default_VexArchInfo(&vai_amd64);
178       vai_amd64.hwcaps = 0;
179       vai_amd64.endness = VexEndnessLE;
180 
181       LibVEX_default_VexArchInfo(&vai_ppc32);
182       vai_ppc32.hwcaps = 0;
183       vai_ppc32.ppc_icache_line_szB = 128;
184 
185       LibVEX_default_VexArchInfo(&vai_arm);
186       vai_arm.hwcaps = VEX_HWCAPS_ARM_VFP3 | VEX_HWCAPS_ARM_NEON | 7;
187 
188       LibVEX_default_VexArchInfo(&vai_mips32);
189       vai_mips32.endness = VexEndnessLE;
190       vai_mips32.hwcaps = VEX_PRID_COMP_MIPS;
191 
192       LibVEX_default_VexArchInfo(&vai_mips64);
193       vai_mips64.endness = VexEndnessLE;
194 
195       LibVEX_default_VexAbiInfo(&vbi);
196       vbi.guest_stack_redzone_size = 128;
197 
198       /* ----- Set up args for LibVEX_Translate ----- */
199 
200       vta.abiinfo_both    = vbi;
201       vta.guest_bytes     = &origbuf[18];
202       vta.guest_bytes_addr = orig_addr;
203       vta.callback_opaque = NULL;
204       vta.chase_into_ok   = chase_into_not_ok;
205       vta.guest_extents   = &vge;
206       vta.host_bytes      = transbuf;
207       vta.host_bytes_size = N_TRANSBUF;
208       vta.host_bytes_used = &trans_used;
209 
210 #if 0 /* ppc32 -> ppc32 */
211       vta.arch_guest     = VexArchPPC32;
212       vta.archinfo_guest = vai_ppc32;
213       vta.arch_host      = VexArchPPC32;
214       vta.archinfo_host  = vai_ppc32;
215 #endif
216 #if 0 /* amd64 -> amd64 */
217       vta.arch_guest     = VexArchAMD64;
218       vta.archinfo_guest = vai_amd64;
219       vta.arch_host      = VexArchAMD64;
220       vta.archinfo_host  = vai_amd64;
221 #endif
222 #if 0 /* x86 -> x86 */
223       vta.arch_guest     = VexArchX86;
224       vta.archinfo_guest = vai_x86;
225       vta.arch_host      = VexArchX86;
226       vta.archinfo_host  = vai_x86;
227 #endif
228 #if 1 /* x86 -> mips32 */
229       vta.arch_guest     = VexArchX86;
230       vta.archinfo_guest = vai_x86;
231       vta.arch_host      = VexArchMIPS32;
232       vta.archinfo_host  = vai_mips32;
233 #endif
234 #if 0 /* amd64 -> mips64 */
235       vta.arch_guest     = VexArchAMD64;
236       vta.archinfo_guest = vai_amd64;
237       vta.arch_host      = VexArchMIPS64;
238       vta.archinfo_host  = vai_mips64;
239 #endif
240 #if 0 /* arm -> arm */
241       vta.arch_guest     = VexArchARM;
242       vta.archinfo_guest = vai_arm;
243       vta.arch_host      = VexArchARM;
244       vta.archinfo_host  = vai_arm;
245       /* ARM/Thumb only hacks, that are needed to keep the ITstate
246          analyser in the front end happy.  */
247       vta.guest_bytes     = &origbuf[18 +1];
248       vta.guest_bytes_addr = (Addr) &origbuf[18 +1];
249 #endif
250 
251 #if 1 /* no instrumentation */
252       vta.instrument1     = NULL;
253       vta.instrument2     = NULL;
254 #endif
255 #if 0 /* addrcheck */
256       vta.instrument1     = ac_instrument;
257       vta.instrument2     = NULL;
258 #endif
259 #if 0 /* memcheck */
260       vta.instrument1     = mc_instrument;
261       vta.instrument2     = NULL;
262 #endif
263       vta.needs_self_check  = needs_self_check;
264       vta.preamble_function = NULL;
265       vta.traceflags      = TEST_FLAGS;
266       vta.addProfInc      = False;
267       vta.sigill_diag     = True;
268 
269       vta.disp_cp_chain_me_to_slowEP = (void*)0x12345678;
270       vta.disp_cp_chain_me_to_fastEP = (void*)0x12345679;
271       vta.disp_cp_xindir             = (void*)0x1234567A;
272       vta.disp_cp_xassisted          = (void*)0x1234567B;
273 
274       vta.finaltidy = NULL;
275 
276       for (i = 0; i < TEST_N_ITERS; i++)
277          tres = LibVEX_Translate ( &vta );
278 
279       if (tres.status != VexTransOK)
280          printf("\ntres = %d\n", (Int)tres.status);
281       assert(tres.status == VexTransOK);
282       assert(tres.n_sc_extents == 0);
283       assert(vge.n_used == 1);
284       assert((UInt)(vge.len[0]) == orig_nbytes);
285 
286       sum = 0;
287       for (i = 0; i < trans_used; i++)
288          sum += (UInt)transbuf[i];
289       printf ( " %6.2f ... %u\n",
290                (double)trans_used / (double)vge.len[0], sum );
291    }
292 
293    fclose(f);
294    printf("\n");
295    LibVEX_ShowAllocStats();
296 
297    return 0;
298 }
299 
300 //////////////////////////////////////////////////////////////////////
301 //////////////////////////////////////////////////////////////////////
302 //////////////////////////////////////////////////////////////////////
303 //////////////////////////////////////////////////////////////////////
304 //////////////////////////////////////////////////////////////////////
305 //////////////////////////////////////////////////////////////////////
306 //////////////////////////////////////////////////////////////////////
307 //////////////////////////////////////////////////////////////////////
308 
309 #if 0 /* UNUSED */
310 
311 static
312 __attribute((noreturn))
313 void panic ( HChar* s )
314 {
315   printf("\npanic: %s\n", s);
316   failure_exit();
317 }
318 
319 static
320 IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
321 {
322 /* Use this rather than eg. -1 because it's a UInt. */
323 #define INVALID_DATA_SIZE   999999
324 
325    Int         i;
326    Int         sz;
327    IRCallee*   helper;
328    IRStmt*    st;
329    IRExpr* data;
330    IRExpr* addr;
331    Bool needSz;
332 
333    /* Set up BB */
334    IRSB* bb     = emptyIRSB();
335    bb->tyenv    = dopyIRTypeEnv(bb_in->tyenv);
336    bb->next     = dopyIRExpr(bb_in->next);
337    bb->jumpkind = bb_in->jumpkind;
338 
339    /* No loads to consider in ->next. */
340    assert(isIRAtom(bb_in->next));
341 
342    for (i = 0; i <  bb_in->stmts_used; i++) {
343       st = bb_in->stmts[i];
344       if (!st) continue;
345 
346       switch (st->tag) {
347 
348          case Ist_Tmp:
349             data = st->Ist.Tmp.data;
350             if (data->tag == Iex_LDle) {
351                addr = data->Iex.LDle.addr;
352                sz = sizeofIRType(data->Iex.LDle.ty);
353                needSz = False;
354                switch (sz) {
355                   case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
356                                                  (void*)0x12345601); break;
357                   case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
358                                                  (void*)0x12345602); break;
359                   case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
360                                                  (void*)0x12345603); break;
361                   default: helper = mkIRCallee(0, "ac_helperc_LOADN",
362                                                   (void*)0x12345604);
363                                                   needSz = True; break;
364                }
365                if (needSz) {
366                   addStmtToIRSB(
367                      bb,
368                      IRStmt_Dirty(
369                         unsafeIRDirty_0_N( helper->regparms,
370 					   helper->name, helper->addr,
371                                            mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
372                   ));
373                } else {
374                   addStmtToIRSB(
375                      bb,
376                      IRStmt_Dirty(
377                         unsafeIRDirty_0_N( helper->regparms,
378 					   helper->name, helper->addr,
379                                            mkIRExprVec_1(addr) )
380                   ));
381                }
382             }
383             break;
384 
385          case Ist_STle:
386             data = st->Ist.STle.data;
387             addr = st->Ist.STle.addr;
388             assert(isIRAtom(data));
389             assert(isIRAtom(addr));
390             sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
391             needSz = False;
392             switch (sz) {
393                case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
394                                               (void*)0x12345605); break;
395                case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
396                                               (void*)0x12345606); break;
397                case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
398                                               (void*)0x12345607); break;
399                default: helper = mkIRCallee(0, "ac_helperc_STOREN",
400                                                (void*)0x12345608);
401                                                needSz = True; break;
402             }
403             if (needSz) {
404                addStmtToIRSB(
405                   bb,
406                   IRStmt_Dirty(
407                      unsafeIRDirty_0_N( helper->regparms,
408     				        helper->name, helper->addr,
409                                         mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
410                ));
411             } else {
412                addStmtToIRSB(
413                   bb,
414                   IRStmt_Dirty(
415                      unsafeIRDirty_0_N( helper->regparms,
416                                         helper->name, helper->addr,
417                                         mkIRExprVec_1(addr) )
418                ));
419             }
420             break;
421 
422          case Ist_Put:
423             assert(isIRAtom(st->Ist.Put.data));
424             break;
425 
426          case Ist_PutI:
427             assert(isIRAtom(st->Ist.PutI.ix));
428             assert(isIRAtom(st->Ist.PutI.data));
429             break;
430 
431          case Ist_Exit:
432             assert(isIRAtom(st->Ist.Exit.guard));
433             break;
434 
435          case Ist_Dirty:
436             /* If the call doesn't interact with memory, we ain't
437                interested. */
438             if (st->Ist.Dirty.details->mFx == Ifx_None)
439                break;
440             goto unhandled;
441 
442          default:
443          unhandled:
444             printf("\n");
445             ppIRStmt(st);
446             printf("\n");
447             panic("addrcheck: unhandled IRStmt");
448       }
449 
450       addStmtToIRSB( bb, dopyIRStmt(st));
451    }
452 
453    return bb;
454 }
455 #endif /* UNUSED */
456 
457 //////////////////////////////////////////////////////////////////////
458 //////////////////////////////////////////////////////////////////////
459 //////////////////////////////////////////////////////////////////////
460 //////////////////////////////////////////////////////////////////////
461 //////////////////////////////////////////////////////////////////////
462 //////////////////////////////////////////////////////////////////////
463 //////////////////////////////////////////////////////////////////////
464 //////////////////////////////////////////////////////////////////////
465 
466 #if 1 /* UNUSED */
467 
468 static
469 __attribute((noreturn))
panic(HChar * s)470 void panic ( HChar* s )
471 {
472   printf("\npanic: %s\n", s);
473   failure_exit();
474 }
475 
476 #define tl_assert(xxx) assert(xxx)
477 #define VG_(xxxx) xxxx
478 #define tool_panic(zzz) panic(zzz)
479 #define MC_(zzzz) MC_##zzzz
480 #define TL_(zzzz) SK_##zzzz
481 
482 
483 static void MC_helperc_complain_undef ( void );
484 static void MC_helperc_LOADV8 ( void );
485 static void MC_helperc_LOADV4 ( void );
486 static void MC_helperc_LOADV2 ( void );
487 static void MC_helperc_LOADV1 ( void );
488 static void MC_helperc_STOREV8( void );
489 static void MC_helperc_STOREV4( void );
490 static void MC_helperc_STOREV2( void );
491 static void MC_helperc_STOREV1( void );
492 static void MC_helperc_value_check0_fail( void );
493 static void MC_helperc_value_check1_fail( void );
494 static void MC_helperc_value_check4_fail( void );
495 
MC_helperc_complain_undef(void)496 static void MC_helperc_complain_undef ( void ) { }
MC_helperc_LOADV8(void)497 static void MC_helperc_LOADV8 ( void ) { }
MC_helperc_LOADV4(void)498 static void MC_helperc_LOADV4 ( void ) { }
MC_helperc_LOADV2(void)499 static void MC_helperc_LOADV2 ( void ) { }
MC_helperc_LOADV1(void)500 static void MC_helperc_LOADV1 ( void ) { }
MC_helperc_STOREV8(void)501 static void MC_helperc_STOREV8( void ) { }
MC_helperc_STOREV4(void)502 static void MC_helperc_STOREV4( void ) { }
MC_helperc_STOREV2(void)503 static void MC_helperc_STOREV2( void ) { }
MC_helperc_STOREV1(void)504 static void MC_helperc_STOREV1( void ) { }
MC_helperc_value_check0_fail(void)505 static void MC_helperc_value_check0_fail( void ) { }
MC_helperc_value_check1_fail(void)506 static void MC_helperc_value_check1_fail( void ) { }
MC_helperc_value_check4_fail(void)507 static void MC_helperc_value_check4_fail( void ) { }
508 
509 
510 /*--------------------------------------------------------------------*/
511 /*--- Instrument IR to perform memory checking operations.         ---*/
512 /*---                                               mc_translate.c ---*/
513 /*--------------------------------------------------------------------*/
514 
515 /*
516    This file is part of MemCheck, a heavyweight Valgrind tool for
517    detecting memory errors.
518 
519    Copyright (C) 2000-2015 Julian Seward
520       jseward@acm.org
521 
522    This program is free software; you can redistribute it and/or
523    modify it under the terms of the GNU General Public License as
524    published by the Free Software Foundation; either version 2 of the
525    License, or (at your option) any later version.
526 
527    This program is distributed in the hope that it will be useful, but
528    WITHOUT ANY WARRANTY; without even the implied warranty of
529    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
530    General Public License for more details.
531 
532    You should have received a copy of the GNU General Public License
533    along with this program; if not, write to the Free Software
534    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
535    02111-1307, USA.
536 
537    The GNU General Public License is contained in the file COPYING.
538 */
539 
540 //#include "mc_include.h"
541 
542 
543 /*------------------------------------------------------------*/
544 /*--- Forward decls                                        ---*/
545 /*------------------------------------------------------------*/
546 
547 struct _MCEnv;
548 
549 static IRType  shadowType ( IRType ty );
550 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
551 
552 
553 /*------------------------------------------------------------*/
554 /*--- Memcheck running state, and tmp management.          ---*/
555 /*------------------------------------------------------------*/
556 
557 /* Carries around state during memcheck instrumentation. */
558 typedef
559    struct _MCEnv {
560       /* MODIFIED: the bb being constructed.  IRStmts are added. */
561       IRSB* bb;
562 
563       /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
564          original temps to their current their current shadow temp.
565          Initially all entries are IRTemp_INVALID.  Entries are added
566          lazily since many original temps are not used due to
567          optimisation prior to instrumentation.  Note that floating
568          point original tmps are shadowed by integer tmps of the same
569          size, and Bit-typed original tmps are shadowed by the type
570          Ity_I8.  See comment below. */
571       IRTemp* tmpMap;
572       Int     n_originalTmps; /* for range checking */
573 
574       /* READONLY: the guest layout.  This indicates which parts of
575          the guest state should be regarded as 'always defined'. */
576       VexGuestLayout* layout;
577       /* READONLY: the host word type.  Needed for constructing
578          arguments of type 'HWord' to be passed to helper functions.
579          Ity_I32 or Ity_I64 only. */
580       IRType hWordTy;
581    }
582    MCEnv;
583 
584 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
585    demand), as they are encountered.  This is for two reasons.
586 
587    (1) (less important reason): Many original tmps are unused due to
588    initial IR optimisation, and we do not want to spaces in tables
589    tracking them.
590 
591    Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
592    table indexed [0 .. n_types-1], which gives the current shadow for
593    each original tmp, or INVALID_IRTEMP if none is so far assigned.
594    It is necessary to support making multiple assignments to a shadow
595    -- specifically, after testing a shadow for definedness, it needs
596    to be made defined.  But IR's SSA property disallows this.
597 
598    (2) (more important reason): Therefore, when a shadow needs to get
599    a new value, a new temporary is created, the value is assigned to
600    that, and the tmpMap is updated to reflect the new binding.
601 
602    A corollary is that if the tmpMap maps a given tmp to
603    INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
604    there's a read-before-write error in the original tmps.  The IR
605    sanity checker should catch all such anomalies, however.
606 */
607 
608 /* Find the tmp currently shadowing the given original tmp.  If none
609    so far exists, allocate one.  */
findShadowTmp(MCEnv * mce,IRTemp orig)610 static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
611 {
612    tl_assert(orig < mce->n_originalTmps);
613    if (mce->tmpMap[orig] == IRTemp_INVALID) {
614       mce->tmpMap[orig]
615          = newIRTemp(mce->bb->tyenv,
616                      shadowType(mce->bb->tyenv->types[orig]));
617    }
618    return mce->tmpMap[orig];
619 }
620 
621 /* Allocate a new shadow for the given original tmp.  This means any
622    previous shadow is abandoned.  This is needed because it is
623    necessary to give a new value to a shadow once it has been tested
624    for undefinedness, but unfortunately IR's SSA property disallows
625    this.  Instead we must abandon the old shadow, allocate a new one
626    and use that instead. */
newShadowTmp(MCEnv * mce,IRTemp orig)627 static void newShadowTmp ( MCEnv* mce, IRTemp orig )
628 {
629    tl_assert(orig < mce->n_originalTmps);
630    mce->tmpMap[orig]
631       = newIRTemp(mce->bb->tyenv,
632                   shadowType(mce->bb->tyenv->types[orig]));
633 }
634 
635 
636 /*------------------------------------------------------------*/
637 /*--- IRAtoms -- a subset of IRExprs                       ---*/
638 /*------------------------------------------------------------*/
639 
640 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
641    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
642    input, most of this code deals in atoms.  Usefully, a value atom
643    always has a V-value which is also an atom: constants are shadowed
644    by constants, and temps are shadowed by the corresponding shadow
645    temporary. */
646 
647 typedef  IRExpr  IRAtom;
648 
649 /* (used for sanity checks only): is this an atom which looks
650    like it's from original code? */
isOriginalAtom(MCEnv * mce,IRAtom * a1)651 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
652 {
653    if (a1->tag == Iex_Const)
654       return True;
655    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
656       return True;
657    return False;
658 }
659 
660 /* (used for sanity checks only): is this an atom which looks
661    like it's from shadow code? */
isShadowAtom(MCEnv * mce,IRAtom * a1)662 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
663 {
664    if (a1->tag == Iex_Const)
665       return True;
666    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
667       return True;
668    return False;
669 }
670 
671 /* (used for sanity checks only): check that both args are atoms and
672    are identically-kinded. */
sameKindedAtoms(IRAtom * a1,IRAtom * a2)673 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
674 {
675    if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
676       return True;
677    if (a1->tag == Iex_Const && a2->tag == Iex_Const)
678       return True;
679    return False;
680 }
681 
682 
683 /*------------------------------------------------------------*/
684 /*--- Type management                                      ---*/
685 /*------------------------------------------------------------*/
686 
687 /* Shadow state is always accessed using integer types.  This returns
688    an integer type with the same size (as per sizeofIRType) as the
689    given type.  The only valid shadow types are Bit, I8, I16, I32,
690    I64, V128. */
691 
shadowType(IRType ty)692 static IRType shadowType ( IRType ty )
693 {
694    switch (ty) {
695       case Ity_I1:
696       case Ity_I8:
697       case Ity_I16:
698       case Ity_I32:
699       case Ity_I64:  return ty;
700       case Ity_F32:  return Ity_I32;
701       case Ity_F64:  return Ity_I64;
702       case Ity_V128: return Ity_V128;
703       default: ppIRType(ty);
704                VG_(tool_panic)("memcheck:shadowType");
705    }
706 }
707 
708 /* Produce a 'defined' value of the given shadow type.  Should only be
709    supplied shadow types (Bit/I8/I16/I32/UI64). */
definedOfType(IRType ty)710 static IRExpr* definedOfType ( IRType ty ) {
711    switch (ty) {
712       case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
713       case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
714       case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
715       case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
716       case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
717       case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
718       default:      VG_(tool_panic)("memcheck:definedOfType");
719    }
720 }
721 
722 
723 /*------------------------------------------------------------*/
724 /*--- Constructing IR fragments                            ---*/
725 /*------------------------------------------------------------*/
726 
727 /* assign value to tmp */
728 #define assign(_bb,_tmp,_expr)   \
729    addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
730 
731 /* add stmt to a bb */
732 #define stmt(_bb,_stmt)    \
733    addStmtToIRSB((_bb), (_stmt))
734 
735 /* build various kinds of expressions */
736 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
737 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
738 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
739 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
740 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
741 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
742 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
743 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
744 
745 /* bind the given expression to a new temporary, and return the
746    temporary.  This effectively converts an arbitrary expression into
747    an atom. */
assignNew(MCEnv * mce,IRType ty,IRExpr * e)748 static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
749    IRTemp t = newIRTemp(mce->bb->tyenv, ty);
750    assign(mce->bb, t, e);
751    return mkexpr(t);
752 }
753 
754 
755 /*------------------------------------------------------------*/
756 /*--- Constructing definedness primitive ops               ---*/
757 /*------------------------------------------------------------*/
758 
759 /* --------- Defined-if-either-defined --------- */
760 
mkDifD8(MCEnv * mce,IRAtom * a1,IRAtom * a2)761 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
762    tl_assert(isShadowAtom(mce,a1));
763    tl_assert(isShadowAtom(mce,a2));
764    return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
765 }
766 
mkDifD16(MCEnv * mce,IRAtom * a1,IRAtom * a2)767 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
768    tl_assert(isShadowAtom(mce,a1));
769    tl_assert(isShadowAtom(mce,a2));
770    return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
771 }
772 
mkDifD32(MCEnv * mce,IRAtom * a1,IRAtom * a2)773 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
774    tl_assert(isShadowAtom(mce,a1));
775    tl_assert(isShadowAtom(mce,a2));
776    return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
777 }
778 
mkDifD64(MCEnv * mce,IRAtom * a1,IRAtom * a2)779 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
780    tl_assert(isShadowAtom(mce,a1));
781    tl_assert(isShadowAtom(mce,a2));
782    return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
783 }
784 
mkDifDV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)785 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
786    tl_assert(isShadowAtom(mce,a1));
787    tl_assert(isShadowAtom(mce,a2));
788    return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
789 }
790 
791 /* --------- Undefined-if-either-undefined --------- */
792 
mkUifU8(MCEnv * mce,IRAtom * a1,IRAtom * a2)793 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
794    tl_assert(isShadowAtom(mce,a1));
795    tl_assert(isShadowAtom(mce,a2));
796    return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
797 }
798 
mkUifU16(MCEnv * mce,IRAtom * a1,IRAtom * a2)799 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
800    tl_assert(isShadowAtom(mce,a1));
801    tl_assert(isShadowAtom(mce,a2));
802    return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
803 }
804 
mkUifU32(MCEnv * mce,IRAtom * a1,IRAtom * a2)805 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
806    tl_assert(isShadowAtom(mce,a1));
807    tl_assert(isShadowAtom(mce,a2));
808    return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
809 }
810 
mkUifU64(MCEnv * mce,IRAtom * a1,IRAtom * a2)811 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
812    tl_assert(isShadowAtom(mce,a1));
813    tl_assert(isShadowAtom(mce,a2));
814    return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
815 }
816 
mkUifUV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)817 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
818    tl_assert(isShadowAtom(mce,a1));
819    tl_assert(isShadowAtom(mce,a2));
820    return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
821 }
822 
mkUifU(MCEnv * mce,IRType vty,IRAtom * a1,IRAtom * a2)823 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
824    switch (vty) {
825       case Ity_I8:   return mkUifU8(mce, a1, a2);
826       case Ity_I16:  return mkUifU16(mce, a1, a2);
827       case Ity_I32:  return mkUifU32(mce, a1, a2);
828       case Ity_I64:  return mkUifU64(mce, a1, a2);
829       case Ity_V128: return mkUifUV128(mce, a1, a2);
830       default:
831          VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
832          VG_(tool_panic)("memcheck:mkUifU");
833    }
834 }
835 
836 /* --------- The Left-family of operations. --------- */
837 
mkLeft8(MCEnv * mce,IRAtom * a1)838 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
839    tl_assert(isShadowAtom(mce,a1));
840    /* It's safe to duplicate a1 since it's only an atom */
841    return assignNew(mce, Ity_I8,
842                     binop(Iop_Or8, a1,
843                           assignNew(mce, Ity_I8,
844                                     /* unop(Iop_Neg8, a1)))); */
845                                     binop(Iop_Sub8, mkU8(0), a1) )));
846 }
847 
mkLeft16(MCEnv * mce,IRAtom * a1)848 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
849    tl_assert(isShadowAtom(mce,a1));
850    /* It's safe to duplicate a1 since it's only an atom */
851    return assignNew(mce, Ity_I16,
852                     binop(Iop_Or16, a1,
853                           assignNew(mce, Ity_I16,
854                                     /* unop(Iop_Neg16, a1)))); */
855                                     binop(Iop_Sub16, mkU16(0), a1) )));
856 }
857 
mkLeft32(MCEnv * mce,IRAtom * a1)858 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
859    tl_assert(isShadowAtom(mce,a1));
860    /* It's safe to duplicate a1 since it's only an atom */
861    return assignNew(mce, Ity_I32,
862                     binop(Iop_Or32, a1,
863                           assignNew(mce, Ity_I32,
864                                     /* unop(Iop_Neg32, a1)))); */
865                                     binop(Iop_Sub32, mkU32(0), a1) )));
866 }
867 
868 /* --------- 'Improvement' functions for AND/OR. --------- */
869 
870 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
871    defined (0); all other -> undefined (1).
872 */
mkImproveAND8(MCEnv * mce,IRAtom * data,IRAtom * vbits)873 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
874 {
875    tl_assert(isOriginalAtom(mce, data));
876    tl_assert(isShadowAtom(mce, vbits));
877    tl_assert(sameKindedAtoms(data, vbits));
878    return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
879 }
880 
mkImproveAND16(MCEnv * mce,IRAtom * data,IRAtom * vbits)881 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
882 {
883    tl_assert(isOriginalAtom(mce, data));
884    tl_assert(isShadowAtom(mce, vbits));
885    tl_assert(sameKindedAtoms(data, vbits));
886    return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
887 }
888 
mkImproveAND32(MCEnv * mce,IRAtom * data,IRAtom * vbits)889 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
890 {
891    tl_assert(isOriginalAtom(mce, data));
892    tl_assert(isShadowAtom(mce, vbits));
893    tl_assert(sameKindedAtoms(data, vbits));
894    return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
895 }
896 
mkImproveAND64(MCEnv * mce,IRAtom * data,IRAtom * vbits)897 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
898 {
899    tl_assert(isOriginalAtom(mce, data));
900    tl_assert(isShadowAtom(mce, vbits));
901    tl_assert(sameKindedAtoms(data, vbits));
902    return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
903 }
904 
mkImproveANDV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)905 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
906 {
907    tl_assert(isOriginalAtom(mce, data));
908    tl_assert(isShadowAtom(mce, vbits));
909    tl_assert(sameKindedAtoms(data, vbits));
910    return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
911 }
912 
913 /* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
914    defined (0); all other -> undefined (1).
915 */
mkImproveOR8(MCEnv * mce,IRAtom * data,IRAtom * vbits)916 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
917 {
918    tl_assert(isOriginalAtom(mce, data));
919    tl_assert(isShadowAtom(mce, vbits));
920    tl_assert(sameKindedAtoms(data, vbits));
921    return assignNew(
922              mce, Ity_I8,
923              binop(Iop_Or8,
924                    assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
925                    vbits) );
926 }
927 
mkImproveOR16(MCEnv * mce,IRAtom * data,IRAtom * vbits)928 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
929 {
930    tl_assert(isOriginalAtom(mce, data));
931    tl_assert(isShadowAtom(mce, vbits));
932    tl_assert(sameKindedAtoms(data, vbits));
933    return assignNew(
934              mce, Ity_I16,
935              binop(Iop_Or16,
936                    assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
937                    vbits) );
938 }
939 
mkImproveOR32(MCEnv * mce,IRAtom * data,IRAtom * vbits)940 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
941 {
942    tl_assert(isOriginalAtom(mce, data));
943    tl_assert(isShadowAtom(mce, vbits));
944    tl_assert(sameKindedAtoms(data, vbits));
945    return assignNew(
946              mce, Ity_I32,
947              binop(Iop_Or32,
948                    assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
949                    vbits) );
950 }
951 
mkImproveOR64(MCEnv * mce,IRAtom * data,IRAtom * vbits)952 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
953 {
954    tl_assert(isOriginalAtom(mce, data));
955    tl_assert(isShadowAtom(mce, vbits));
956    tl_assert(sameKindedAtoms(data, vbits));
957    return assignNew(
958              mce, Ity_I64,
959              binop(Iop_Or64,
960                    assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
961                    vbits) );
962 }
963 
mkImproveORV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)964 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
965 {
966    tl_assert(isOriginalAtom(mce, data));
967    tl_assert(isShadowAtom(mce, vbits));
968    tl_assert(sameKindedAtoms(data, vbits));
969    return assignNew(
970              mce, Ity_V128,
971              binop(Iop_OrV128,
972                    assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
973                    vbits) );
974 }
975 
976 /* --------- Pessimising casts. --------- */
977 
mkPCastTo(MCEnv * mce,IRType dst_ty,IRAtom * vbits)978 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
979 {
980    IRType  ty;
981    IRAtom* tmp1;
982    /* Note, dst_ty is a shadow type, not an original type. */
983    /* First of all, collapse vbits down to a single bit. */
984    tl_assert(isShadowAtom(mce,vbits));
985    ty   = typeOfIRExpr(mce->bb->tyenv, vbits);
986    tmp1 = NULL;
987    switch (ty) {
988       case Ity_I1:
989          tmp1 = vbits;
990          break;
991       case Ity_I8:
992          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
993          break;
994       case Ity_I16:
995          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
996          break;
997       case Ity_I32:
998          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
999          break;
1000       case Ity_I64:
1001          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
1002          break;
1003       default:
1004          VG_(tool_panic)("mkPCastTo(1)");
1005    }
1006    tl_assert(tmp1);
1007    /* Now widen up to the dst type. */
1008    switch (dst_ty) {
1009       case Ity_I1:
1010          return tmp1;
1011       case Ity_I8:
1012          return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
1013       case Ity_I16:
1014          return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
1015       case Ity_I32:
1016          return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
1017       case Ity_I64:
1018          return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1019       case Ity_V128:
1020          tmp1 = assignNew(mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
1021          tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
1022          return tmp1;
1023       default:
1024          ppIRType(dst_ty);
1025          VG_(tool_panic)("mkPCastTo(2)");
1026    }
1027 }
1028 
1029 
1030 /*------------------------------------------------------------*/
1031 /*--- Emit a test and complaint if something is undefined. ---*/
1032 /*------------------------------------------------------------*/
1033 
1034 /* Set the annotations on a dirty helper to indicate that the stack
1035    pointer and instruction pointers might be read.  This is the
1036    behaviour of all 'emit-a-complaint' style functions we might
1037    call. */
1038 
setHelperAnns(MCEnv * mce,IRDirty * di)1039 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1040    di->nFxState = 2;
1041    di->fxState[0].fx     = Ifx_Read;
1042    di->fxState[0].offset = mce->layout->offset_SP;
1043    di->fxState[0].size   = mce->layout->sizeof_SP;
1044    di->fxState[1].fx     = Ifx_Read;
1045    di->fxState[1].offset = mce->layout->offset_IP;
1046    di->fxState[1].size   = mce->layout->sizeof_IP;
1047 }
1048 
1049 
1050 /* Check the supplied **original** atom for undefinedness, and emit a
1051    complaint if so.  Once that happens, mark it as defined.  This is
1052    possible because the atom is either a tmp or literal.  If it's a
1053    tmp, it will be shadowed by a tmp, and so we can set the shadow to
1054    be defined.  In fact as mentioned above, we will have to allocate a
1055    new tmp to carry the new 'defined' shadow value, and update the
1056    original->tmp mapping accordingly; we cannot simply assign a new
1057    value to an existing shadow tmp as this breaks SSAness -- resulting
1058    in the post-instrumentation sanity checker spluttering in disapproval.
1059 */
complainIfUndefined(MCEnv * mce,IRAtom * atom)1060 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1061 {
1062    IRAtom*  vatom;
1063    IRType   ty;
1064    Int      sz;
1065    IRDirty* di;
1066    IRAtom*  cond;
1067 
1068    /* Since the original expression is atomic, there's no duplicated
1069       work generated by making multiple V-expressions for it.  So we
1070       don't really care about the possibility that someone else may
1071       also create a V-interpretion for it. */
1072    tl_assert(isOriginalAtom(mce, atom));
1073    vatom = expr2vbits( mce, atom );
1074    tl_assert(isShadowAtom(mce, vatom));
1075    tl_assert(sameKindedAtoms(atom, vatom));
1076 
1077    ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1078 
1079    /* sz is only used for constructing the error message */
1080    sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1081 
1082    cond = mkPCastTo( mce, Ity_I1, vatom );
1083    /* cond will be 0 if all defined, and 1 if any not defined. */
1084 
1085    switch (sz) {
1086       case 0:
1087          di = unsafeIRDirty_0_N( 0/*regparms*/,
1088                                  "MC_(helperc_value_check0_fail)",
1089                                  &MC_(helperc_value_check0_fail),
1090                                  mkIRExprVec_0()
1091                                );
1092          break;
1093       case 1:
1094          di = unsafeIRDirty_0_N( 0/*regparms*/,
1095                                  "MC_(helperc_value_check1_fail)",
1096                                  &MC_(helperc_value_check1_fail),
1097                                  mkIRExprVec_0()
1098                                );
1099          break;
1100       case 4:
1101          di = unsafeIRDirty_0_N( 0/*regparms*/,
1102                                  "MC_(helperc_value_check4_fail)",
1103                                  &MC_(helperc_value_check4_fail),
1104                                  mkIRExprVec_0()
1105                                );
1106          break;
1107       default:
1108          di = unsafeIRDirty_0_N( 1/*regparms*/,
1109                                  "MC_(helperc_complain_undef)",
1110                                  &MC_(helperc_complain_undef),
1111                                  mkIRExprVec_1( mkIRExpr_HWord( sz ))
1112                                );
1113          break;
1114    }
1115    di->guard = cond;
1116    setHelperAnns( mce, di );
1117    stmt( mce->bb, IRStmt_Dirty(di));
1118 
1119    /* Set the shadow tmp to be defined.  First, update the
1120       orig->shadow tmp mapping to reflect the fact that this shadow is
1121       getting a new value. */
1122    tl_assert(isIRAtom(vatom));
1123    /* sameKindedAtoms ... */
1124    if (vatom->tag == Iex_RdTmp) {
1125       tl_assert(atom->tag == Iex_RdTmp);
1126       newShadowTmp(mce, atom->Iex.RdTmp.tmp);
1127       assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
1128                       definedOfType(ty));
1129    }
1130 }
1131 
1132 
1133 /*------------------------------------------------------------*/
1134 /*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1135 /*------------------------------------------------------------*/
1136 
1137 /* Examine the always-defined sections declared in layout to see if
1138    the (offset,size) section is within one.  Note, is is an error to
1139    partially fall into such a region: (offset,size) should either be
1140    completely in such a region or completely not-in such a region.
1141 */
isAlwaysDefd(MCEnv * mce,Int offset,Int size)1142 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1143 {
1144    Int minoffD, maxoffD, i;
1145    Int minoff = offset;
1146    Int maxoff = minoff + size - 1;
1147    tl_assert((minoff & ~0xFFFF) == 0);
1148    tl_assert((maxoff & ~0xFFFF) == 0);
1149 
1150    for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1151       minoffD = mce->layout->alwaysDefd[i].offset;
1152       maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1153       tl_assert((minoffD & ~0xFFFF) == 0);
1154       tl_assert((maxoffD & ~0xFFFF) == 0);
1155 
1156       if (maxoff < minoffD || maxoffD < minoff)
1157          continue; /* no overlap */
1158       if (minoff >= minoffD && maxoff <= maxoffD)
1159          return True; /* completely contained in an always-defd section */
1160 
1161       VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1162    }
1163    return False; /* could not find any containing section */
1164 }
1165 
1166 
1167 /* Generate into bb suitable actions to shadow this Put.  If the state
1168    slice is marked 'always defined', do nothing.  Otherwise, write the
1169    supplied V bits to the shadow state.  We can pass in either an
1170    original atom or a V-atom, but not both.  In the former case the
1171    relevant V-bits are then generated from the original.
1172 */
1173 static
do_shadow_PUT(MCEnv * mce,Int offset,IRAtom * atom,IRAtom * vatom)1174 void do_shadow_PUT ( MCEnv* mce,  Int offset,
1175                      IRAtom* atom, IRAtom* vatom )
1176 {
1177    IRType ty;
1178    if (atom) {
1179       tl_assert(!vatom);
1180       tl_assert(isOriginalAtom(mce, atom));
1181       vatom = expr2vbits( mce, atom );
1182    } else {
1183       tl_assert(vatom);
1184       tl_assert(isShadowAtom(mce, vatom));
1185    }
1186 
1187    ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1188    tl_assert(ty != Ity_I1);
1189    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1190       /* later: no ... */
1191       /* emit code to emit a complaint if any of the vbits are 1. */
1192       /* complainIfUndefined(mce, atom); */
1193    } else {
1194       /* Do a plain shadow Put. */
1195       stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1196    }
1197 }
1198 
1199 
1200 /* Return an expression which contains the V bits corresponding to the
1201    given GETI (passed in in pieces).
1202 */
1203 static
do_shadow_PUTI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias,IRAtom * atom)1204 void do_shadow_PUTI ( MCEnv* mce,
1205                       IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
1206 {
1207    IRAtom* vatom;
1208    IRType  ty, tyS;
1209    Int     arrSize;;
1210 
1211    tl_assert(isOriginalAtom(mce,atom));
1212    vatom = expr2vbits( mce, atom );
1213    tl_assert(sameKindedAtoms(atom, vatom));
1214    ty   = descr->elemTy;
1215    tyS  = shadowType(ty);
1216    arrSize = descr->nElems * sizeofIRType(ty);
1217    tl_assert(ty != Ity_I1);
1218    tl_assert(isOriginalAtom(mce,ix));
1219    complainIfUndefined(mce,ix);
1220    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1221       /* later: no ... */
1222       /* emit code to emit a complaint if any of the vbits are 1. */
1223       /* complainIfUndefined(mce, atom); */
1224    } else {
1225       /* Do a cloned version of the Put that refers to the shadow
1226          area. */
1227       IRRegArray* new_descr
1228          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1229                       tyS, descr->nElems);
1230       stmt( mce->bb, IRStmt_PutI( mkIRPutI( new_descr, ix, bias, vatom ) ));
1231    }
1232 }
1233 
1234 
1235 /* Return an expression which contains the V bits corresponding to the
1236    given GET (passed in in pieces).
1237 */
1238 static
shadow_GET(MCEnv * mce,Int offset,IRType ty)1239 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1240 {
1241    IRType tyS = shadowType(ty);
1242    tl_assert(ty != Ity_I1);
1243    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1244       /* Always defined, return all zeroes of the relevant type */
1245       return definedOfType(tyS);
1246    } else {
1247       /* return a cloned version of the Get that refers to the shadow
1248          area. */
1249       return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1250    }
1251 }
1252 
1253 
1254 /* Return an expression which contains the V bits corresponding to the
1255    given GETI (passed in in pieces).
1256 */
1257 static
shadow_GETI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias)1258 IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
1259 {
1260    IRType ty   = descr->elemTy;
1261    IRType tyS  = shadowType(ty);
1262    Int arrSize = descr->nElems * sizeofIRType(ty);
1263    tl_assert(ty != Ity_I1);
1264    tl_assert(isOriginalAtom(mce,ix));
1265    complainIfUndefined(mce,ix);
1266    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1267       /* Always defined, return all zeroes of the relevant type */
1268       return definedOfType(tyS);
1269    } else {
1270       /* return a cloned version of the Get that refers to the shadow
1271          area. */
1272       IRRegArray* new_descr
1273          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1274                       tyS, descr->nElems);
1275       return IRExpr_GetI( new_descr, ix, bias );
1276    }
1277 }
1278 
1279 
1280 /*------------------------------------------------------------*/
1281 /*--- Generating approximations for unknown operations,    ---*/
1282 /*--- using lazy-propagate semantics                       ---*/
1283 /*------------------------------------------------------------*/
1284 
1285 /* Lazy propagation of undefinedness from two values, resulting in the
1286    specified shadow type.
1287 */
1288 static
mkLazy2(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2)1289 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1290 {
1291    /* force everything via 32-bit intermediaries. */
1292    IRAtom* at;
1293    tl_assert(isShadowAtom(mce,va1));
1294    tl_assert(isShadowAtom(mce,va2));
1295    at = mkPCastTo(mce, Ity_I32, va1);
1296    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1297    at = mkPCastTo(mce, finalVty, at);
1298    return at;
1299 }
1300 
1301 
1302 /* Do the lazy propagation game from a null-terminated vector of
1303    atoms.  This is presumably the arguments to a helper call, so the
1304    IRCallee info is also supplied in order that we can know which
1305    arguments should be ignored (via the .mcx_mask field).
1306 */
1307 static
mkLazyN(MCEnv * mce,IRAtom ** exprvec,IRType finalVtype,IRCallee * cee)1308 IRAtom* mkLazyN ( MCEnv* mce,
1309                   IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1310 {
1311    Int i;
1312    IRAtom* here;
1313    IRAtom* curr = definedOfType(Ity_I32);
1314    for (i = 0; exprvec[i]; i++) {
1315       tl_assert(i < 32);
1316       tl_assert(isOriginalAtom(mce, exprvec[i]));
1317       /* Only take notice of this arg if the callee's mc-exclusion
1318          mask does not say it is to be excluded. */
1319       if (cee->mcx_mask & (1<<i)) {
1320          /* the arg is to be excluded from definedness checking.  Do
1321             nothing. */
1322          if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1323       } else {
1324          /* calculate the arg's definedness, and pessimistically merge
1325             it in. */
1326          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1327          curr = mkUifU32(mce, here, curr);
1328       }
1329    }
1330    return mkPCastTo(mce, finalVtype, curr );
1331 }
1332 
1333 
1334 /*------------------------------------------------------------*/
1335 /*--- Generating expensive sequences for exact carry-chain ---*/
1336 /*--- propagation in add/sub and related operations.       ---*/
1337 /*------------------------------------------------------------*/
1338 
1339 static
1340 __attribute__((unused))
expensiveAdd32(MCEnv * mce,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1341 IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
1342                                      IRAtom* aa,  IRAtom* bb )
1343 {
1344    IRAtom *a_min, *b_min, *a_max, *b_max;
1345    IRType ty;
1346    IROp   opAND, opOR, opXOR, opNOT, opADD;
1347 
1348    tl_assert(isShadowAtom(mce,qaa));
1349    tl_assert(isShadowAtom(mce,qbb));
1350    tl_assert(isOriginalAtom(mce,aa));
1351    tl_assert(isOriginalAtom(mce,bb));
1352    tl_assert(sameKindedAtoms(qaa,aa));
1353    tl_assert(sameKindedAtoms(qbb,bb));
1354 
1355    ty    = Ity_I32;
1356    opAND = Iop_And32;
1357    opOR  = Iop_Or32;
1358    opXOR = Iop_Xor32;
1359    opNOT = Iop_Not32;
1360    opADD = Iop_Add32;
1361 
1362    // a_min = aa & ~qaa
1363    a_min = assignNew(mce,ty,
1364                      binop(opAND, aa,
1365                                   assignNew(mce,ty, unop(opNOT, qaa))));
1366 
1367    // b_min = bb & ~qbb
1368    b_min = assignNew(mce,ty,
1369                      binop(opAND, bb,
1370                                   assignNew(mce,ty, unop(opNOT, qbb))));
1371 
1372    // a_max = aa | qaa
1373    a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1374 
1375    // b_max = bb | qbb
1376    b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1377 
1378    // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1379    return
1380    assignNew(mce,ty,
1381       binop( opOR,
1382              assignNew(mce,ty, binop(opOR, qaa, qbb)),
1383              assignNew(mce,ty,
1384                 binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
1385                              assignNew(mce,ty, binop(opADD, a_max, b_max))
1386                 )
1387              )
1388       )
1389    );
1390 }
1391 
1392 
1393 /*------------------------------------------------------------*/
1394 /*--- Helpers for dealing with vector primops.            ---*/
1395 /*------------------------------------------------------------*/
1396 
1397 /* Vector pessimisation -- pessimise within each lane individually. */
1398 
mkPCast8x16(MCEnv * mce,IRAtom * at)1399 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1400 {
1401    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1402 }
1403 
mkPCast16x8(MCEnv * mce,IRAtom * at)1404 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1405 {
1406    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1407 }
1408 
mkPCast32x4(MCEnv * mce,IRAtom * at)1409 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1410 {
1411    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1412 }
1413 
mkPCast64x2(MCEnv * mce,IRAtom * at)1414 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1415 {
1416    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1417 }
1418 
1419 
1420 /* Here's a simple scheme capable of handling ops derived from SSE1
1421    code and while only generating ops that can be efficiently
1422    implemented in SSE1. */
1423 
1424 /* All-lanes versions are straightforward:
1425 
1426    binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1427 
1428    unary32Fx4(x,y)    ==> PCast32x4(x#)
1429 
1430    Lowest-lane-only versions are more complex:
1431 
1432    binary32F0x4(x,y)  ==> SetV128lo32(
1433                              x#,
1434                              PCast32(V128to32(UifUV128(x#,y#)))
1435                           )
1436 
1437    This is perhaps not so obvious.  In particular, it's faster to
1438    do a V128-bit UifU and then take the bottom 32 bits than the more
1439    obvious scheme of taking the bottom 32 bits of each operand
1440    and doing a 32-bit UifU.  Basically since UifU is fast and
1441    chopping lanes off vector values is slow.
1442 
1443    Finally:
1444 
1445    unary32F0x4(x)     ==> SetV128lo32(
1446                              x#,
1447                              PCast32(V128to32(x#))
1448                           )
1449 
1450    Where:
1451 
1452    PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
1453    PCast32x4(v#) = CmpNEZ32x4(v#)
1454 */
1455 
1456 static
binary32Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1457 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1458 {
1459    IRAtom* at;
1460    tl_assert(isShadowAtom(mce, vatomX));
1461    tl_assert(isShadowAtom(mce, vatomY));
1462    at = mkUifUV128(mce, vatomX, vatomY);
1463    at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
1464    return at;
1465 }
1466 
1467 static
unary32Fx4(MCEnv * mce,IRAtom * vatomX)1468 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1469 {
1470    IRAtom* at;
1471    tl_assert(isShadowAtom(mce, vatomX));
1472    at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
1473    return at;
1474 }
1475 
1476 static
binary32F0x4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1477 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1478 {
1479    IRAtom* at;
1480    tl_assert(isShadowAtom(mce, vatomX));
1481    tl_assert(isShadowAtom(mce, vatomY));
1482    at = mkUifUV128(mce, vatomX, vatomY);
1483    at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
1484    at = mkPCastTo(mce, Ity_I32, at);
1485    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1486    return at;
1487 }
1488 
1489 static
unary32F0x4(MCEnv * mce,IRAtom * vatomX)1490 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1491 {
1492    IRAtom* at;
1493    tl_assert(isShadowAtom(mce, vatomX));
1494    at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
1495    at = mkPCastTo(mce, Ity_I32, at);
1496    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1497    return at;
1498 }
1499 
1500 /* --- ... and ... 64Fx2 versions of the same ... --- */
1501 
1502 static
binary64Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1503 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1504 {
1505    IRAtom* at;
1506    tl_assert(isShadowAtom(mce, vatomX));
1507    tl_assert(isShadowAtom(mce, vatomY));
1508    at = mkUifUV128(mce, vatomX, vatomY);
1509    at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
1510    return at;
1511 }
1512 
1513 static
unary64Fx2(MCEnv * mce,IRAtom * vatomX)1514 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1515 {
1516    IRAtom* at;
1517    tl_assert(isShadowAtom(mce, vatomX));
1518    at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
1519    return at;
1520 }
1521 
1522 static
binary64F0x2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)1523 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1524 {
1525    IRAtom* at;
1526    tl_assert(isShadowAtom(mce, vatomX));
1527    tl_assert(isShadowAtom(mce, vatomY));
1528    at = mkUifUV128(mce, vatomX, vatomY);
1529    at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
1530    at = mkPCastTo(mce, Ity_I64, at);
1531    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1532    return at;
1533 }
1534 
1535 static
unary64F0x2(MCEnv * mce,IRAtom * vatomX)1536 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1537 {
1538    IRAtom* at;
1539    tl_assert(isShadowAtom(mce, vatomX));
1540    at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
1541    at = mkPCastTo(mce, Ity_I64, at);
1542    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1543    return at;
1544 }
1545 
1546 /* --- --- Vector saturated narrowing --- --- */
1547 
1548 /* This is quite subtle.  What to do is simple:
1549 
1550    Let the original narrowing op be QNarrowW{S,U}xN.  Produce:
1551 
1552       the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1553 
1554    Why this is right is not so simple.  Consider a lane in the args,
1555    vatom1 or 2, doesn't matter.
1556 
1557    After the PCast, that lane is all 0s (defined) or all
1558    1s(undefined).
1559 
1560    Both signed and unsigned saturating narrowing of all 0s produces
1561    all 0s, which is what we want.
1562 
1563    The all-1s case is more complex.  Unsigned narrowing interprets an
1564    all-1s input as the largest unsigned integer, and so produces all
1565    1s as a result since that is the largest unsigned value at the
1566    smaller width.
1567 
1568    Signed narrowing interprets all 1s as -1.  Fortunately, -1 narrows
1569    to -1, so we still wind up with all 1s at the smaller width.
1570 
1571    So: In short, pessimise the args, then apply the original narrowing
1572    op.
1573 */
1574 static
vectorNarrowV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)1575 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1576                           IRAtom* vatom1, IRAtom* vatom2)
1577 {
1578    IRAtom *at1, *at2, *at3;
1579    IRAtom* (*pcast)( MCEnv*, IRAtom* );
1580    switch (narrow_op) {
1581       case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
1582       case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
1583       case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
1584       default: VG_(tool_panic)("vectorNarrowV128");
1585    }
1586    tl_assert(isShadowAtom(mce,vatom1));
1587    tl_assert(isShadowAtom(mce,vatom2));
1588    at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1589    at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1590    at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1591    return at3;
1592 }
1593 
1594 
1595 /* --- --- Vector integer arithmetic --- --- */
1596 
1597 /* Simple ... UifU the args and per-lane pessimise the results. */
1598 static
binary8Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1599 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1600 {
1601    IRAtom* at;
1602    at = mkUifUV128(mce, vatom1, vatom2);
1603    at = mkPCast8x16(mce, at);
1604    return at;
1605 }
1606 
1607 static
binary16Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1608 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1609 {
1610    IRAtom* at;
1611    at = mkUifUV128(mce, vatom1, vatom2);
1612    at = mkPCast16x8(mce, at);
1613    return at;
1614 }
1615 
1616 static
binary32Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1617 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1618 {
1619    IRAtom* at;
1620    at = mkUifUV128(mce, vatom1, vatom2);
1621    at = mkPCast32x4(mce, at);
1622    return at;
1623 }
1624 
1625 static
binary64Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)1626 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1627 {
1628    IRAtom* at;
1629    at = mkUifUV128(mce, vatom1, vatom2);
1630    at = mkPCast64x2(mce, at);
1631    return at;
1632 }
1633 
1634 
1635 /*------------------------------------------------------------*/
1636 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
1637 /*------------------------------------------------------------*/
1638 
1639 static
expr2vbits_Binop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2)1640 IRAtom* expr2vbits_Binop ( MCEnv* mce,
1641                            IROp op,
1642                            IRAtom* atom1, IRAtom* atom2 )
1643 {
1644    IRType  and_or_ty;
1645    IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
1646    IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
1647    IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1648 
1649    IRAtom* vatom1 = expr2vbits( mce, atom1 );
1650    IRAtom* vatom2 = expr2vbits( mce, atom2 );
1651 
1652    tl_assert(isOriginalAtom(mce,atom1));
1653    tl_assert(isOriginalAtom(mce,atom2));
1654    tl_assert(isShadowAtom(mce,vatom1));
1655    tl_assert(isShadowAtom(mce,vatom2));
1656    tl_assert(sameKindedAtoms(atom1,vatom1));
1657    tl_assert(sameKindedAtoms(atom2,vatom2));
1658    switch (op) {
1659 
1660       /* V128-bit SIMD (SSE2-esque) */
1661 
1662       case Iop_ShrN16x8:
1663       case Iop_ShrN32x4:
1664       case Iop_ShrN64x2:
1665       case Iop_SarN16x8:
1666       case Iop_SarN32x4:
1667       case Iop_ShlN16x8:
1668       case Iop_ShlN32x4:
1669       case Iop_ShlN64x2:
1670          /* Same scheme as with all other shifts. */
1671          complainIfUndefined(mce, atom2);
1672          return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1673 
1674       case Iop_QSub8Ux16:
1675       case Iop_QSub8Sx16:
1676       case Iop_Sub8x16:
1677       case Iop_Min8Ux16:
1678       case Iop_Max8Ux16:
1679       case Iop_CmpGT8Sx16:
1680       case Iop_CmpEQ8x16:
1681       case Iop_Avg8Ux16:
1682       case Iop_QAdd8Ux16:
1683       case Iop_QAdd8Sx16:
1684       case Iop_Add8x16:
1685          return binary8Ix16(mce, vatom1, vatom2);
1686 
1687       case Iop_QSub16Ux8:
1688       case Iop_QSub16Sx8:
1689       case Iop_Sub16x8:
1690       case Iop_Mul16x8:
1691       case Iop_MulHi16Sx8:
1692       case Iop_MulHi16Ux8:
1693       case Iop_Min16Sx8:
1694       case Iop_Max16Sx8:
1695       case Iop_CmpGT16Sx8:
1696       case Iop_CmpEQ16x8:
1697       case Iop_Avg16Ux8:
1698       case Iop_QAdd16Ux8:
1699       case Iop_QAdd16Sx8:
1700       case Iop_Add16x8:
1701          return binary16Ix8(mce, vatom1, vatom2);
1702 
1703       case Iop_Sub32x4:
1704       case Iop_QSub32Sx4:
1705       case Iop_QSub32Ux4:
1706       case Iop_CmpGT32Sx4:
1707       case Iop_CmpEQ32x4:
1708       case Iop_Add32x4:
1709       case Iop_QAdd32Ux4:
1710       case Iop_QAdd32Sx4:
1711          return binary32Ix4(mce, vatom1, vatom2);
1712 
1713       case Iop_Sub64x2:
1714       case Iop_QSub64Ux2:
1715       case Iop_QSub64Sx2:
1716       case Iop_Add64x2:
1717       case Iop_QAdd64Ux2:
1718       case Iop_QAdd64Sx2:
1719          return binary64Ix2(mce, vatom1, vatom2);
1720 
1721       case Iop_QNarrowBin32Sto16Sx8:
1722       case Iop_QNarrowBin16Sto8Sx16:
1723       case Iop_QNarrowBin16Sto8Ux16:
1724          return vectorNarrowV128(mce, op, vatom1, vatom2);
1725 
1726       case Iop_Sub64Fx2:
1727       case Iop_Mul64Fx2:
1728       case Iop_Min64Fx2:
1729       case Iop_Max64Fx2:
1730       case Iop_Div64Fx2:
1731       case Iop_CmpLT64Fx2:
1732       case Iop_CmpLE64Fx2:
1733       case Iop_CmpEQ64Fx2:
1734       case Iop_Add64Fx2:
1735          return binary64Fx2(mce, vatom1, vatom2);
1736 
1737       case Iop_Sub64F0x2:
1738       case Iop_Mul64F0x2:
1739       case Iop_Min64F0x2:
1740       case Iop_Max64F0x2:
1741       case Iop_Div64F0x2:
1742       case Iop_CmpLT64F0x2:
1743       case Iop_CmpLE64F0x2:
1744       case Iop_CmpEQ64F0x2:
1745       case Iop_Add64F0x2:
1746          return binary64F0x2(mce, vatom1, vatom2);
1747 
1748       /* V128-bit SIMD (SSE1-esque) */
1749 
1750       case Iop_Sub32Fx4:
1751       case Iop_Mul32Fx4:
1752       case Iop_Min32Fx4:
1753       case Iop_Max32Fx4:
1754       case Iop_Div32Fx4:
1755       case Iop_CmpLT32Fx4:
1756       case Iop_CmpLE32Fx4:
1757       case Iop_CmpEQ32Fx4:
1758       case Iop_Add32Fx4:
1759          return binary32Fx4(mce, vatom1, vatom2);
1760 
1761       case Iop_Sub32F0x4:
1762       case Iop_Mul32F0x4:
1763       case Iop_Min32F0x4:
1764       case Iop_Max32F0x4:
1765       case Iop_Div32F0x4:
1766       case Iop_CmpLT32F0x4:
1767       case Iop_CmpLE32F0x4:
1768       case Iop_CmpEQ32F0x4:
1769       case Iop_Add32F0x4:
1770          return binary32F0x4(mce, vatom1, vatom2);
1771 
1772       /* V128-bit data-steering */
1773       case Iop_SetV128lo32:
1774       case Iop_SetV128lo64:
1775       case Iop_64HLtoV128:
1776       case Iop_InterleaveLO64x2:
1777       case Iop_InterleaveLO32x4:
1778       case Iop_InterleaveLO16x8:
1779       case Iop_InterleaveLO8x16:
1780       case Iop_InterleaveHI64x2:
1781       case Iop_InterleaveHI32x4:
1782       case Iop_InterleaveHI16x8:
1783       case Iop_InterleaveHI8x16:
1784          return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1785 
1786       /* Scalar floating point */
1787 
1788          //      case Iop_RoundF64:
1789       case Iop_F64toI64S:
1790       case Iop_I64StoF64:
1791          /* First arg is I32 (rounding mode), second is F64 or I64
1792             (data). */
1793          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1794 
1795       case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1796          /* Takes two F64 args. */
1797       case Iop_F64toI32S:
1798       case Iop_F64toF32:
1799          /* First arg is I32 (rounding mode), second is F64 (data). */
1800          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1801 
1802       case Iop_F64toI16S:
1803          /* First arg is I32 (rounding mode), second is F64 (data). */
1804          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1805 
1806       case Iop_ScaleF64:
1807       case Iop_Yl2xF64:
1808       case Iop_Yl2xp1F64:
1809       case Iop_PRemF64:
1810       case Iop_AtanF64:
1811       case Iop_AddF64:
1812       case Iop_DivF64:
1813       case Iop_SubF64:
1814       case Iop_MulF64:
1815          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1816 
1817       case Iop_CmpF64:
1818          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1819 
1820       /* non-FP after here */
1821 
1822       case Iop_DivModU64to32:
1823       case Iop_DivModS64to32:
1824          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1825 
1826       case Iop_16HLto32:
1827          return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
1828       case Iop_32HLto64:
1829          return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1830 
1831       case Iop_MullS32:
1832       case Iop_MullU32: {
1833          IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1834          IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1835          return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1836       }
1837 
1838       case Iop_MullS16:
1839       case Iop_MullU16: {
1840          IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1841          IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1842          return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1843       }
1844 
1845       case Iop_MullS8:
1846       case Iop_MullU8: {
1847          IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1848          IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1849          return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1850       }
1851 
1852       case Iop_Add32:
1853 #        if 0
1854          return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
1855 #        endif
1856       case Iop_Sub32:
1857       case Iop_Mul32:
1858          return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1859 
1860       case Iop_Mul16:
1861       case Iop_Add16:
1862       case Iop_Sub16:
1863          return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1864 
1865       case Iop_Sub8:
1866       case Iop_Add8:
1867          return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1868 
1869       case Iop_CmpLE32S: case Iop_CmpLE32U:
1870       case Iop_CmpLT32U: case Iop_CmpLT32S:
1871       case Iop_CmpEQ32: case Iop_CmpNE32:
1872          return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1873 
1874       case Iop_CmpEQ16: case Iop_CmpNE16:
1875          return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1876 
1877       case Iop_CmpEQ8: case Iop_CmpNE8:
1878          return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1879 
1880       case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1881          /* Complain if the shift amount is undefined.  Then simply
1882             shift the first arg's V bits by the real shift amount. */
1883          complainIfUndefined(mce, atom2);
1884          return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1885 
1886       case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
1887          /* Same scheme as with 32-bit shifts. */
1888          complainIfUndefined(mce, atom2);
1889          return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1890 
1891       case Iop_Shl8: case Iop_Shr8:
1892          /* Same scheme as with 32-bit shifts. */
1893          complainIfUndefined(mce, atom2);
1894          return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1895 
1896       case Iop_Shl64: case Iop_Shr64:
1897          /* Same scheme as with 32-bit shifts. */
1898          complainIfUndefined(mce, atom2);
1899          return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1900 
1901       case Iop_AndV128:
1902          uifu = mkUifUV128; difd = mkDifDV128;
1903          and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
1904       case Iop_And64:
1905          uifu = mkUifU64; difd = mkDifD64;
1906          and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
1907       case Iop_And32:
1908          uifu = mkUifU32; difd = mkDifD32;
1909          and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1910       case Iop_And16:
1911          uifu = mkUifU16; difd = mkDifD16;
1912          and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1913       case Iop_And8:
1914          uifu = mkUifU8; difd = mkDifD8;
1915          and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1916 
1917       case Iop_OrV128:
1918          uifu = mkUifUV128; difd = mkDifDV128;
1919          and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
1920       case Iop_Or64:
1921          uifu = mkUifU64; difd = mkDifD64;
1922          and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
1923       case Iop_Or32:
1924          uifu = mkUifU32; difd = mkDifD32;
1925          and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1926       case Iop_Or16:
1927          uifu = mkUifU16; difd = mkDifD16;
1928          and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1929       case Iop_Or8:
1930          uifu = mkUifU8; difd = mkDifD8;
1931          and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1932 
1933       do_And_Or:
1934          return
1935          assignNew(
1936             mce,
1937             and_or_ty,
1938             difd(mce, uifu(mce, vatom1, vatom2),
1939                       difd(mce, improve(mce, atom1, vatom1),
1940                                 improve(mce, atom2, vatom2) ) ) );
1941 
1942       case Iop_Xor8:
1943          return mkUifU8(mce, vatom1, vatom2);
1944       case Iop_Xor16:
1945          return mkUifU16(mce, vatom1, vatom2);
1946       case Iop_Xor32:
1947          return mkUifU32(mce, vatom1, vatom2);
1948       case Iop_Xor64:
1949          return mkUifU64(mce, vatom1, vatom2);
1950       case Iop_XorV128:
1951          return mkUifUV128(mce, vatom1, vatom2);
1952 
1953       default:
1954          ppIROp(op);
1955          VG_(tool_panic)("memcheck:expr2vbits_Binop");
1956    }
1957 }
1958 
1959 
1960 static
expr2vbits_Unop(MCEnv * mce,IROp op,IRAtom * atom)1961 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1962 {
1963    IRAtom* vatom = expr2vbits( mce, atom );
1964    tl_assert(isOriginalAtom(mce,atom));
1965    switch (op) {
1966 
1967       case Iop_Sqrt64Fx2:
1968          return unary64Fx2(mce, vatom);
1969 
1970       case Iop_Sqrt64F0x2:
1971          return unary64F0x2(mce, vatom);
1972 
1973       case Iop_Sqrt32Fx4:
1974       case Iop_RecipEst32Fx4:
1975          return unary32Fx4(mce, vatom);
1976 
1977       case Iop_Sqrt32F0x4:
1978       case Iop_RSqrtEst32F0x4:
1979       case Iop_RecipEst32F0x4:
1980          return unary32F0x4(mce, vatom);
1981 
1982       case Iop_32UtoV128:
1983       case Iop_64UtoV128:
1984          return assignNew(mce, Ity_V128, unop(op, vatom));
1985 
1986       case Iop_F32toF64:
1987       case Iop_I32StoF64:
1988       case Iop_NegF64:
1989       case Iop_SinF64:
1990       case Iop_CosF64:
1991       case Iop_TanF64:
1992       case Iop_SqrtF64:
1993       case Iop_AbsF64:
1994       case Iop_2xm1F64:
1995          return mkPCastTo(mce, Ity_I64, vatom);
1996 
1997       case Iop_Clz32:
1998       case Iop_Ctz32:
1999          return mkPCastTo(mce, Ity_I32, vatom);
2000 
2001       case Iop_32Sto64:
2002       case Iop_32Uto64:
2003       case Iop_V128to64:
2004       case Iop_V128HIto64:
2005          return assignNew(mce, Ity_I64, unop(op, vatom));
2006 
2007       case Iop_64to32:
2008       case Iop_64HIto32:
2009       case Iop_1Uto32:
2010       case Iop_8Uto32:
2011       case Iop_16Uto32:
2012       case Iop_16Sto32:
2013       case Iop_8Sto32:
2014          return assignNew(mce, Ity_I32, unop(op, vatom));
2015 
2016       case Iop_8Sto16:
2017       case Iop_8Uto16:
2018       case Iop_32to16:
2019       case Iop_32HIto16:
2020          return assignNew(mce, Ity_I16, unop(op, vatom));
2021 
2022       case Iop_1Uto8:
2023       case Iop_16to8:
2024       case Iop_32to8:
2025          return assignNew(mce, Ity_I8, unop(op, vatom));
2026 
2027       case Iop_32to1:
2028          return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
2029 
2030       case Iop_ReinterpF64asI64:
2031       case Iop_ReinterpI64asF64:
2032       case Iop_ReinterpI32asF32:
2033       case Iop_NotV128:
2034       case Iop_Not64:
2035       case Iop_Not32:
2036       case Iop_Not16:
2037       case Iop_Not8:
2038       case Iop_Not1:
2039          return vatom;
2040 
2041       default:
2042          ppIROp(op);
2043          VG_(tool_panic)("memcheck:expr2vbits_Unop");
2044    }
2045 }
2046 
2047 
2048 /* Worker function; do not call directly. */
2049 static
expr2vbits_LDle_WRK(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias)2050 IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2051 {
2052    void*    helper;
2053    HChar*   hname;
2054    IRDirty* di;
2055    IRTemp   datavbits;
2056    IRAtom*  addrAct;
2057 
2058    tl_assert(isOriginalAtom(mce,addr));
2059 
2060    /* First, emit a definedness test for the address.  This also sets
2061       the address (shadow) to 'defined' following the test. */
2062    complainIfUndefined( mce, addr );
2063 
2064    /* Now cook up a call to the relevant helper function, to read the
2065       data V bits from shadow memory. */
2066    ty = shadowType(ty);
2067    switch (ty) {
2068       case Ity_I64: helper = &MC_(helperc_LOADV8);
2069                     hname = "MC_(helperc_LOADV8)";
2070                     break;
2071       case Ity_I32: helper = &MC_(helperc_LOADV4);
2072                     hname = "MC_(helperc_LOADV4)";
2073                     break;
2074       case Ity_I16: helper = &MC_(helperc_LOADV2);
2075                     hname = "MC_(helperc_LOADV2)";
2076                     break;
2077       case Ity_I8:  helper = &MC_(helperc_LOADV1);
2078                     hname = "MC_(helperc_LOADV1)";
2079                     break;
2080       default:      ppIRType(ty);
2081                     VG_(tool_panic)("memcheck:do_shadow_LDle");
2082    }
2083 
2084    /* Generate the actual address into addrAct. */
2085    if (bias == 0) {
2086       addrAct = addr;
2087    } else {
2088       IROp    mkAdd;
2089       IRAtom* eBias;
2090       IRType  tyAddr  = mce->hWordTy;
2091       tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2092       mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2093       eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2094       addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2095    }
2096 
2097    /* We need to have a place to park the V bits we're just about to
2098       read. */
2099    datavbits = newIRTemp(mce->bb->tyenv, ty);
2100    di = unsafeIRDirty_1_N( datavbits,
2101                            1/*regparms*/, hname, helper,
2102                            mkIRExprVec_1( addrAct ));
2103    setHelperAnns( mce, di );
2104    stmt( mce->bb, IRStmt_Dirty(di) );
2105 
2106    return mkexpr(datavbits);
2107 }
2108 
2109 
2110 static
expr2vbits_LDle(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias)2111 IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2112 {
2113    IRAtom *v64hi, *v64lo;
2114    switch (shadowType(ty)) {
2115       case Ity_I8:
2116       case Ity_I16:
2117       case Ity_I32:
2118       case Ity_I64:
2119          return expr2vbits_LDle_WRK(mce, ty, addr, bias);
2120       case Ity_V128:
2121          v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
2122          v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
2123          return assignNew( mce,
2124                            Ity_V128,
2125                            binop(Iop_64HLtoV128, v64hi, v64lo));
2126       default:
2127          VG_(tool_panic)("expr2vbits_LDle");
2128    }
2129 }
2130 
2131 
2132 static
expr2vbits_ITE(MCEnv * mce,IRAtom * cond,IRAtom * iftrue,IRAtom * iffalse)2133 IRAtom* expr2vbits_ITE ( MCEnv* mce,
2134                          IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
2135 {
2136    IRAtom *vbitsC, *vbits0, *vbits1;
2137    IRType ty;
2138    /* Given ITE(cond,iftrue,iffalse), generate
2139          ITE(cond,iftrue#,iffalse#) `UifU` PCast(cond#)
2140       That is, steer the V bits like the originals, but trash the
2141       result if the steering value is undefined.  This gives
2142       lazy propagation. */
2143    tl_assert(isOriginalAtom(mce, cond));
2144    tl_assert(isOriginalAtom(mce, iftrue));
2145    tl_assert(isOriginalAtom(mce, iffalse));
2146 
2147    vbitsC = expr2vbits(mce, cond);
2148    vbits0 = expr2vbits(mce, iffalse);
2149    vbits1 = expr2vbits(mce, iftrue);
2150    ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2151 
2152    return
2153       mkUifU(mce, ty, assignNew(mce, ty, IRExpr_ITE(cond, vbits1, vbits0)),
2154                       mkPCastTo(mce, ty, vbitsC) );
2155 }
2156 
2157 /* --------- This is the main expression-handling function. --------- */
2158 
2159 static
expr2vbits(MCEnv * mce,IRExpr * e)2160 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2161 {
2162    switch (e->tag) {
2163 
2164       case Iex_Get:
2165          return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2166 
2167       case Iex_GetI:
2168          return shadow_GETI( mce, e->Iex.GetI.descr,
2169                                   e->Iex.GetI.ix, e->Iex.GetI.bias );
2170 
2171       case Iex_RdTmp:
2172          return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
2173 
2174       case Iex_Const:
2175          return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2176 
2177       case Iex_Binop:
2178          return expr2vbits_Binop(
2179                    mce,
2180                    e->Iex.Binop.op,
2181                    e->Iex.Binop.arg1, e->Iex.Binop.arg2
2182                 );
2183 
2184       case Iex_Unop:
2185          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2186 
2187       case Iex_Load:
2188          return expr2vbits_LDle( mce, e->Iex.Load.ty,
2189                                       e->Iex.Load.addr, 0/*addr bias*/ );
2190 
2191       case Iex_CCall:
2192          return mkLazyN( mce, e->Iex.CCall.args,
2193                               e->Iex.CCall.retty,
2194                               e->Iex.CCall.cee );
2195 
2196       case Iex_ITE:
2197          return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
2198                                 e->Iex.ITE.iffalse);
2199 
2200       default:
2201          VG_(printf)("\n");
2202          ppIRExpr(e);
2203          VG_(printf)("\n");
2204          VG_(tool_panic)("memcheck: expr2vbits");
2205    }
2206 }
2207 
2208 /*------------------------------------------------------------*/
2209 /*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
2210 /*------------------------------------------------------------*/
2211 
2212 /* Widen a value to the host word size. */
2213 
2214 static
zwidenToHostWord(MCEnv * mce,IRAtom * vatom)2215 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2216 {
2217    IRType ty, tyH;
2218 
2219    /* vatom is vbits-value and as such can only have a shadow type. */
2220    tl_assert(isShadowAtom(mce,vatom));
2221 
2222    ty  = typeOfIRExpr(mce->bb->tyenv, vatom);
2223    tyH = mce->hWordTy;
2224 
2225    if (tyH == Ity_I32) {
2226       switch (ty) {
2227          case Ity_I32: return vatom;
2228          case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2229          case Ity_I8:  return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2230          default:      goto unhandled;
2231       }
2232    } else {
2233       goto unhandled;
2234    }
2235   unhandled:
2236    VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2237    VG_(tool_panic)("zwidenToHostWord");
2238 }
2239 
2240 
2241 /* Generate a shadow store.  addr is always the original address atom.
2242    You can pass in either originals or V-bits for the data atom, but
2243    obviously not both.  */
2244 
2245 static
do_shadow_STle(MCEnv * mce,IRAtom * addr,UInt bias,IRAtom * data,IRAtom * vdata)2246 void do_shadow_STle ( MCEnv* mce,
2247                       IRAtom* addr, UInt bias,
2248                       IRAtom* data, IRAtom* vdata )
2249 {
2250    IROp     mkAdd;
2251    IRType   ty, tyAddr;
2252    IRDirty  *di, *diLo64, *diHi64;
2253    IRAtom   *addrAct, *addrLo64, *addrHi64;
2254    IRAtom   *vdataLo64, *vdataHi64;
2255    IRAtom   *eBias, *eBias0, *eBias8;
2256    void*    helper = NULL;
2257    HChar*   hname = NULL;
2258 
2259    tyAddr = mce->hWordTy;
2260    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2261    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2262 
2263    di = diLo64 = diHi64 = NULL;
2264    eBias = eBias0 = eBias8 = NULL;
2265    addrAct = addrLo64 = addrHi64 = NULL;
2266    vdataLo64 = vdataHi64 = NULL;
2267 
2268    if (data) {
2269       tl_assert(!vdata);
2270       tl_assert(isOriginalAtom(mce, data));
2271       tl_assert(bias == 0);
2272       vdata = expr2vbits( mce, data );
2273    } else {
2274       tl_assert(vdata);
2275    }
2276 
2277    tl_assert(isOriginalAtom(mce,addr));
2278    tl_assert(isShadowAtom(mce,vdata));
2279 
2280    ty = typeOfIRExpr(mce->bb->tyenv, vdata);
2281 
2282    /* First, emit a definedness test for the address.  This also sets
2283       the address (shadow) to 'defined' following the test. */
2284    complainIfUndefined( mce, addr );
2285 
2286    /* Now decide which helper function to call to write the data V
2287       bits into shadow memory. */
2288    switch (ty) {
2289       case Ity_V128: /* we'll use the helper twice */
2290       case Ity_I64: helper = &MC_(helperc_STOREV8);
2291                     hname = "MC_(helperc_STOREV8)";
2292                     break;
2293       case Ity_I32: helper = &MC_(helperc_STOREV4);
2294                     hname = "MC_(helperc_STOREV4)";
2295                     break;
2296       case Ity_I16: helper = &MC_(helperc_STOREV2);
2297                     hname = "MC_(helperc_STOREV2)";
2298                     break;
2299       case Ity_I8:  helper = &MC_(helperc_STOREV1);
2300                     hname = "MC_(helperc_STOREV1)";
2301                     break;
2302       default:      VG_(tool_panic)("memcheck:do_shadow_STle");
2303    }
2304 
2305    if (ty == Ity_V128) {
2306 
2307       /* V128-bit case */
2308       /* See comment in next clause re 64-bit regparms */
2309       eBias0    = tyAddr==Ity_I32 ? mkU32(bias)   : mkU64(bias);
2310       addrLo64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
2311       vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
2312       diLo64    = unsafeIRDirty_0_N(
2313                      1/*regparms*/, hname, helper,
2314                      mkIRExprVec_2( addrLo64, vdataLo64 ));
2315 
2316       eBias8    = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2317       addrHi64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
2318       vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
2319       diHi64    = unsafeIRDirty_0_N(
2320                      1/*regparms*/, hname, helper,
2321                      mkIRExprVec_2( addrHi64, vdataHi64 ));
2322 
2323       setHelperAnns( mce, diLo64 );
2324       setHelperAnns( mce, diHi64 );
2325       stmt( mce->bb, IRStmt_Dirty(diLo64) );
2326       stmt( mce->bb, IRStmt_Dirty(diHi64) );
2327 
2328    } else {
2329 
2330       /* 8/16/32/64-bit cases */
2331       /* Generate the actual address into addrAct. */
2332       if (bias == 0) {
2333          addrAct = addr;
2334       } else {
2335          eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2336          addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2337       }
2338 
2339       if (ty == Ity_I64) {
2340          /* We can't do this with regparm 2 on 32-bit platforms, since
2341             the back ends aren't clever enough to handle 64-bit
2342             regparm args.  Therefore be different. */
2343          di = unsafeIRDirty_0_N(
2344                  1/*regparms*/, hname, helper,
2345                  mkIRExprVec_2( addrAct, vdata ));
2346       } else {
2347          di = unsafeIRDirty_0_N(
2348                  2/*regparms*/, hname, helper,
2349                  mkIRExprVec_2( addrAct,
2350                                 zwidenToHostWord( mce, vdata )));
2351       }
2352       setHelperAnns( mce, di );
2353       stmt( mce->bb, IRStmt_Dirty(di) );
2354    }
2355 
2356 }
2357 
2358 
2359 /* Do lazy pessimistic propagation through a dirty helper call, by
2360    looking at the annotations on it.  This is the most complex part of
2361    Memcheck. */
2362 
szToITy(Int n)2363 static IRType szToITy ( Int n )
2364 {
2365    switch (n) {
2366       case 1: return Ity_I8;
2367       case 2: return Ity_I16;
2368       case 4: return Ity_I32;
2369       case 8: return Ity_I64;
2370       default: VG_(tool_panic)("szToITy(memcheck)");
2371    }
2372 }
2373 
2374 static
do_shadow_Dirty(MCEnv * mce,IRDirty * d)2375 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2376 {
2377    Int     i, n, offset, toDo, gSz, gOff;
2378    IRAtom  *src, *here, *curr;
2379    IRType  tyAddr, tySrc, tyDst;
2380    IRTemp  dst;
2381 
2382    /* First check the guard. */
2383    complainIfUndefined(mce, d->guard);
2384 
2385    /* Now round up all inputs and PCast over them. */
2386    curr = definedOfType(Ity_I32);
2387 
2388    /* Inputs: unmasked args */
2389    for (i = 0; d->args[i]; i++) {
2390       if (d->cee->mcx_mask & (1<<i)) {
2391          /* ignore this arg */
2392       } else {
2393          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2394          curr = mkUifU32(mce, here, curr);
2395       }
2396    }
2397 
2398    /* Inputs: guest state that we read. */
2399    for (i = 0; i < d->nFxState; i++) {
2400       tl_assert(d->fxState[i].fx != Ifx_None);
2401       if (d->fxState[i].fx == Ifx_Write)
2402          continue;
2403 
2404       /* Ignore any sections marked as 'always defined'. */
2405       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
2406          if (0)
2407          VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2408                      d->fxState[i].offset, d->fxState[i].size );
2409          continue;
2410       }
2411 
2412       /* This state element is read or modified.  So we need to
2413          consider it.  If larger than 8 bytes, deal with it in 8-byte
2414          chunks. */
2415       gSz  = d->fxState[i].size;
2416       gOff = d->fxState[i].offset;
2417       tl_assert(gSz > 0);
2418       while (True) {
2419          if (gSz == 0) break;
2420          n = gSz <= 8 ? gSz : 8;
2421          /* update 'curr' with UifU of the state slice
2422             gOff .. gOff+n-1 */
2423          tySrc = szToITy( n );
2424          src   = assignNew( mce, tySrc,
2425                             shadow_GET(mce, gOff, tySrc ) );
2426          here = mkPCastTo( mce, Ity_I32, src );
2427          curr = mkUifU32(mce, here, curr);
2428          gSz -= n;
2429          gOff += n;
2430       }
2431 
2432    }
2433 
2434    /* Inputs: memory.  First set up some info needed regardless of
2435       whether we're doing reads or writes. */
2436    tyAddr = Ity_INVALID;
2437 
2438    if (d->mFx != Ifx_None) {
2439       /* Because we may do multiple shadow loads/stores from the same
2440          base address, it's best to do a single test of its
2441          definedness right now.  Post-instrumentation optimisation
2442          should remove all but this test. */
2443       tl_assert(d->mAddr);
2444       complainIfUndefined(mce, d->mAddr);
2445 
2446       tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2447       tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2448       tl_assert(tyAddr == mce->hWordTy); /* not really right */
2449    }
2450 
2451    /* Deal with memory inputs (reads or modifies) */
2452    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2453       offset = 0;
2454       toDo   = d->mSize;
2455       /* chew off 32-bit chunks */
2456       while (toDo >= 4) {
2457          here = mkPCastTo(
2458                    mce, Ity_I32,
2459                    expr2vbits_LDle ( mce, Ity_I32,
2460                                      d->mAddr, d->mSize - toDo )
2461                 );
2462          curr = mkUifU32(mce, here, curr);
2463          toDo -= 4;
2464       }
2465       /* chew off 16-bit chunks */
2466       while (toDo >= 2) {
2467          here = mkPCastTo(
2468                    mce, Ity_I32,
2469                    expr2vbits_LDle ( mce, Ity_I16,
2470                                      d->mAddr, d->mSize - toDo )
2471                 );
2472          curr = mkUifU32(mce, here, curr);
2473          toDo -= 2;
2474       }
2475       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2476    }
2477 
2478    /* Whew!  So curr is a 32-bit V-value summarising pessimistically
2479       all the inputs to the helper.  Now we need to re-distribute the
2480       results to all destinations. */
2481 
2482    /* Outputs: the destination temporary, if there is one. */
2483    if (d->tmp != IRTemp_INVALID) {
2484       dst   = findShadowTmp(mce, d->tmp);
2485       tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2486       assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2487    }
2488 
2489    /* Outputs: guest state that we write or modify. */
2490    for (i = 0; i < d->nFxState; i++) {
2491       tl_assert(d->fxState[i].fx != Ifx_None);
2492       if (d->fxState[i].fx == Ifx_Read)
2493          continue;
2494       /* Ignore any sections marked as 'always defined'. */
2495       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2496          continue;
2497       /* This state element is written or modified.  So we need to
2498          consider it.  If larger than 8 bytes, deal with it in 8-byte
2499          chunks. */
2500       gSz  = d->fxState[i].size;
2501       gOff = d->fxState[i].offset;
2502       tl_assert(gSz > 0);
2503       while (True) {
2504          if (gSz == 0) break;
2505          n = gSz <= 8 ? gSz : 8;
2506          /* Write suitably-casted 'curr' to the state slice
2507             gOff .. gOff+n-1 */
2508          tyDst = szToITy( n );
2509          do_shadow_PUT( mce, gOff,
2510                              NULL, /* original atom */
2511                              mkPCastTo( mce, tyDst, curr ) );
2512          gSz -= n;
2513          gOff += n;
2514       }
2515    }
2516 
2517    /* Outputs: memory that we write or modify. */
2518    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2519       offset = 0;
2520       toDo   = d->mSize;
2521       /* chew off 32-bit chunks */
2522       while (toDo >= 4) {
2523          do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2524                          NULL, /* original data */
2525                          mkPCastTo( mce, Ity_I32, curr ) );
2526          toDo -= 4;
2527       }
2528       /* chew off 16-bit chunks */
2529       while (toDo >= 2) {
2530          do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2531                          NULL, /* original data */
2532                          mkPCastTo( mce, Ity_I16, curr ) );
2533          toDo -= 2;
2534       }
2535       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2536    }
2537 
2538 }
2539 
2540 
2541 /*------------------------------------------------------------*/
2542 /*--- Memcheck main                                        ---*/
2543 /*------------------------------------------------------------*/
2544 
isBogusAtom(IRAtom * at)2545 static Bool isBogusAtom ( IRAtom* at )
2546 {
2547    ULong n = 0;
2548    IRConst* con;
2549    tl_assert(isIRAtom(at));
2550    if (at->tag == Iex_RdTmp)
2551       return False;
2552    tl_assert(at->tag == Iex_Const);
2553    con = at->Iex.Const.con;
2554    switch (con->tag) {
2555       case Ico_U8:  n = (ULong)con->Ico.U8; break;
2556       case Ico_U16: n = (ULong)con->Ico.U16; break;
2557       case Ico_U32: n = (ULong)con->Ico.U32; break;
2558       case Ico_U64: n = (ULong)con->Ico.U64; break;
2559       default: ppIRExpr(at); tl_assert(0);
2560    }
2561    /* VG_(printf)("%llx\n", n); */
2562    return (n == 0xFEFEFEFF
2563            || n == 0x80808080
2564            || n == 0x1010101
2565            || n == 1010100);
2566 }
2567 
2568 __attribute__((unused))
checkForBogusLiterals(IRStmt * st)2569 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2570 {
2571    Int     i;
2572    IRExpr* e;
2573    switch (st->tag) {
2574       case Ist_WrTmp:
2575          e = st->Ist.WrTmp.data;
2576          switch (e->tag) {
2577             case Iex_Get:
2578             case Iex_RdTmp:
2579                return False;
2580             case Iex_Unop:
2581                return isBogusAtom(e->Iex.Unop.arg);
2582             case Iex_Binop:
2583                return isBogusAtom(e->Iex.Binop.arg1)
2584                       || isBogusAtom(e->Iex.Binop.arg2);
2585             case Iex_ITE:
2586                return isBogusAtom(e->Iex.ITE.cond)
2587                       || isBogusAtom(e->Iex.ITE.iftrue)
2588                       || isBogusAtom(e->Iex.ITE.iffalse);
2589             case Iex_Load:
2590                return isBogusAtom(e->Iex.Load.addr);
2591             case Iex_CCall:
2592                for (i = 0; e->Iex.CCall.args[i]; i++)
2593                   if (isBogusAtom(e->Iex.CCall.args[i]))
2594                      return True;
2595                return False;
2596             default:
2597                goto unhandled;
2598          }
2599       case Ist_Put:
2600          return isBogusAtom(st->Ist.Put.data);
2601       case Ist_Store:
2602          return isBogusAtom(st->Ist.Store.addr)
2603                 || isBogusAtom(st->Ist.Store.data);
2604       case Ist_Exit:
2605          return isBogusAtom(st->Ist.Exit.guard);
2606       default:
2607       unhandled:
2608          ppIRStmt(st);
2609          VG_(tool_panic)("hasBogusLiterals");
2610    }
2611 }
2612 
mc_instrument(void * closureV,IRSB * bb_in,VexGuestLayout * layout,VexGuestExtents * vge,IRType gWordTy,IRType hWordTy)2613 IRSB* mc_instrument ( void* closureV,
2614                       IRSB* bb_in, VexGuestLayout* layout,
2615                       VexGuestExtents* vge,
2616                       IRType gWordTy, IRType hWordTy )
2617 {
2618    Bool verboze = False; //True;
2619 
2620    /* Bool hasBogusLiterals = False; */
2621 
2622    Int i, j, first_stmt;
2623    IRStmt* st;
2624    MCEnv mce;
2625 
2626    /* Set up BB */
2627    IRSB* bb     = emptyIRSB();
2628    bb->tyenv    = deepCopyIRTypeEnv(bb_in->tyenv);
2629    bb->next     = deepCopyIRExpr(bb_in->next);
2630    bb->jumpkind = bb_in->jumpkind;
2631 
2632    /* Set up the running environment.  Only .bb is modified as we go
2633       along. */
2634    mce.bb             = bb;
2635    mce.layout         = layout;
2636    mce.n_originalTmps = bb->tyenv->types_used;
2637    mce.hWordTy        = hWordTy;
2638    mce.tmpMap         = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2639    for (i = 0; i < mce.n_originalTmps; i++)
2640       mce.tmpMap[i] = IRTemp_INVALID;
2641 
2642    /* Iterate over the stmts. */
2643 
2644    for (i = 0; i <  bb_in->stmts_used; i++) {
2645       st = bb_in->stmts[i];
2646       if (!st) continue;
2647 
2648       tl_assert(isFlatIRStmt(st));
2649 
2650       /*
2651       if (!hasBogusLiterals) {
2652          hasBogusLiterals = checkForBogusLiterals(st);
2653          if (hasBogusLiterals) {
2654             VG_(printf)("bogus: ");
2655             ppIRStmt(st);
2656             VG_(printf)("\n");
2657          }
2658       }
2659       */
2660       first_stmt = bb->stmts_used;
2661 
2662       if (verboze) {
2663          ppIRStmt(st);
2664          VG_(printf)("\n\n");
2665       }
2666 
2667       switch (st->tag) {
2668 
2669          case Ist_WrTmp:
2670             assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
2671                         expr2vbits( &mce, st->Ist.WrTmp.data) );
2672             break;
2673 
2674          case Ist_Put:
2675             do_shadow_PUT( &mce,
2676                            st->Ist.Put.offset,
2677                            st->Ist.Put.data,
2678                            NULL /* shadow atom */ );
2679             break;
2680 
2681          case Ist_PutI:
2682             do_shadow_PUTI( &mce,
2683                             st->Ist.PutI.details->descr,
2684                             st->Ist.PutI.details->ix,
2685                             st->Ist.PutI.details->bias,
2686                             st->Ist.PutI.details->data );
2687             break;
2688 
2689          case Ist_Store:
2690             do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
2691                                   st->Ist.Store.data,
2692                                   NULL /* shadow data */ );
2693             break;
2694 
2695          case Ist_Exit:
2696             /* if (!hasBogusLiterals) */
2697                complainIfUndefined( &mce, st->Ist.Exit.guard );
2698             break;
2699 
2700          case Ist_Dirty:
2701             do_shadow_Dirty( &mce, st->Ist.Dirty.details );
2702             break;
2703 
2704          case Ist_IMark:
2705          case Ist_NoOp:
2706             break;
2707 
2708          default:
2709             VG_(printf)("\n");
2710             ppIRStmt(st);
2711             VG_(printf)("\n");
2712             VG_(tool_panic)("memcheck: unhandled IRStmt");
2713 
2714       } /* switch (st->tag) */
2715 
2716       if (verboze) {
2717          for (j = first_stmt; j < bb->stmts_used; j++) {
2718             VG_(printf)("   ");
2719             ppIRStmt(bb->stmts[j]);
2720             VG_(printf)("\n");
2721          }
2722          VG_(printf)("\n");
2723       }
2724 
2725       addStmtToIRSB(bb, st);
2726 
2727    }
2728 
2729    /* Now we need to complain if the jump target is undefined. */
2730    first_stmt = bb->stmts_used;
2731 
2732    if (verboze) {
2733       VG_(printf)("bb->next = ");
2734       ppIRExpr(bb->next);
2735       VG_(printf)("\n\n");
2736    }
2737 
2738    complainIfUndefined( &mce, bb->next );
2739 
2740    if (verboze) {
2741       for (j = first_stmt; j < bb->stmts_used; j++) {
2742          VG_(printf)("   ");
2743          ppIRStmt(bb->stmts[j]);
2744          VG_(printf)("\n");
2745       }
2746       VG_(printf)("\n");
2747    }
2748 
2749    return bb;
2750 }
2751 #endif /* UNUSED */
2752 
2753 /*--------------------------------------------------------------------*/
2754 /*--- end                                              test_main.c ---*/
2755 /*--------------------------------------------------------------------*/
2756