• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Ptrcheck: a pointer-use checker.                             ---*/
4 /*--- This file checks heap accesses.                              ---*/
5 /*---                                                     h_main.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Ptrcheck, a Valgrind tool for checking pointer
10    use in programs.
11 
12    Initial version (Annelid):
13 
14    Copyright (C) 2003-2010 Nicholas Nethercote
15       njn@valgrind.org
16 
17    Valgrind-3.X port:
18 
19    Copyright (C) 2008-2010 OpenWorks Ltd
20       info@open-works.co.uk
21 
22    This program is free software; you can redistribute it and/or
23    modify it under the terms of the GNU General Public License as
24    published by the Free Software Foundation; either version 2 of the
25    License, or (at your option) any later version.
26 
27    This program is distributed in the hope that it will be useful, but
28    WITHOUT ANY WARRANTY; without even the implied warranty of
29    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
30    General Public License for more details.
31 
32    You should have received a copy of the GNU General Public License
33    along with this program; if not, write to the Free Software
34    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
35    02111-1307, USA.
36 
37    The GNU General Public License is contained in the file COPYING.
38 */
39 
40 // FIXME: 64-bit cleanness, check the following
41 // struct _ISNode.ownerCount is 32-bit
42 // struct _ISNode.topLevel is 32-bit
43 // or is that not really right now?  add assertion checks about
44 // the max size of a node
45 
46 // FIXME: should we shadow %RIP?  Maybe not.
47 
48 // FIXME: shadows of temporaries created in preamble, a la memcheck?
49 
50 // FIXME: result of add_new_segment is always ignored
51 
52 // FIXME: the mechanism involving last_seg_added is really ugly.
53 // Do something cleaner.
54 
55 // FIXME: post_reg_write_clientcall: check function pointer comparisons
56 // are safe on toc-afflicted platforms
57 
58 // FIXME: tidy up findShadowTmp
59 
60 // FIXME: post_reg_write_demux(Vg_CoreSysCall) is redundant w.r.t.
61 // the default 'NONPTR' behaviour of post_syscall.  post_reg_write_demux
62 // is called first, then post_syscall.
63 
64 // FIXME: check nothing is mapped in the lowest 1M of memory at
65 // startup, or quit (to do with nonptr_or_unknown, also sync 1M
66 // magic value with PIE default load address in m_ume.c.
67 
68 // FIXME: consider whether we could paint memory acquired from
69 // sys_read etc as NONPTR rather than UNKNOWN.
70 
71 // XXX: recycle freed segments
72 
73 //--------------------------------------------------------------
74 // Metadata:
75 //   HeapBlock.id :: Seg (stored as heap shadowchunk; always non-zero)
76 //   MemLoc.aseg  :: Seg (implicitly stored)
77 //   MemLoc.vseg  :: Seg (explicitly stored as the shadow memory)
78 //   RegLoc.vseg  :: Seg (explicitly stored as shadow registers)
79 //
80 // A Seg is made when new memory is created, eg. with malloc() or mmap().
81 // There are two other Segs:
82 //  - NONPTR:  for something that's definitely not a pointer
83 //  - UNKNOWN: for something that could be a pointer
84 //  - BOTTOM:  used with pointer differences (see below)
85 //
86 // MemLoc.vseg is done at word granularity.  If a pointer is written
87 // to memory misaligned, the information about it will be lost -- it's
88 // treated as two sub-word writes to two adjacent words.  This avoids
89 // certain nasty cases that could arise if we tried to track unaligned
90 // pointers.  Fortunately, misalignment is rare so we don't lose much
91 // information this way.
92 //
93 // MemLoc.aseg is done at byte granularity, and *implicitly* -- ie. not
94 // directly accessible like MemLoc.vseg, but only by searching through all
95 // the segments.  Fortunately, it's mostly checked at LOADs/STOREs;  at that
96 // point we have a pointer p to the MemLoc m as the other arg of the
97 // LOAD/STORE, so we can check to see if the p.vseg's range includes m.  If
98 // not, it's an error and we have to search through all segments to find out
99 // what m.aseg really is.  That's still pretty fast though, thanks to the
100 // interval skip-list used.  With syscalls we must also do the skip-list
101 // search, but only on the first and last bytes touched.
102 //--------------------------------------------------------------
103 
104 //--------------------------------------------------------------
105 // Assumptions, etc:
106 // - see comment at top of SK_(instrument)() for how sub-word ops are
107 //   handled.
108 //
109 // - ioctl(), socketcall() (and ipc() will be) assumed to return non-pointers
110 //
111 // - FPU_W is assumed to never write pointers.
112 //
113 // - Assuming none of the post_mem_writes create segments worth tracking.
114 //
115 // - Treating mmap'd segments (all! including code) like heap segments.  But
116 //   their ranges can change, new ones can be created by unmapping parts of
117 //   old segments, etc.  But this nasty behaviour seems to never happen --
118 //   there are assertions checking it.
119 //--------------------------------------------------------------
120 
121 //--------------------------------------------------------------
122 // What I am checking:
123 // - Type errors:
124 //    * ADD, OR, LEA2: error if two pointer inputs.
125 //    * ADC, SBB: error if one or two pointer inputs.
126 //    * AND, OR: error if two unequal pointer inputs.
127 //    * NEG: error if pointer input.
128 //    * {,i}mul_32_64 if either input is a pointer.
129 //    * shldl/shrdl, bsf/bsr if any inputs are pointers.
130 //
131 // - LOAD, STORE:
132 //    * ptr.vseg must match ptee.aseg.
133 //    * ptee.aseg must not be a freed segment.
134 //
135 // - syscalls: for those accessing memory, look at first and last bytes:
136 //    * check first.aseg == last.aseg
137 //    * check first.aseg and last.aseg are not freed segments.
138 //
139 // What I am not checking, that I expected to when I started:
140 // - AND, XOR: allowing two pointers to be used if both from the same segment,
141 //   because "xor %r,%r" is commonly used to zero %r, and "test %r,%r"
142 //   (which is translated with an AND) is common too.
143 //
144 // - div_64_32/idiv_64_32 can take pointer inputs for the dividend;
145 //   division doesn't make sense, but modulo does, and they're done with the
146 //   same instruction.  (Could try to be super-clever and watch the outputs
147 //   to see if the quotient is used, but not worth it.)
148 //
149 // - mul_64_32/imul_64_32 can take pointers inputs for one arg or the
150 //   other, but not both.  This is because some programs (eg. Mozilla
151 //   Firebird) multiply pointers in hash routines.
152 //
153 // - NEG: can take a pointer.  It happens in glibc in a few places.  I've
154 //   seen the code, didn't understand it, but it's done deliberately.
155 //
156 // What I am not checking/doing, but could, but it would require more
157 // instrumentation and/or slow things down a bit:
158 // - SUB: when differencing two pointers, result is BOTTOM, ie. "don't
159 //   check".  Could link segments instead, slower but a bit more accurate.
160 //   Also use BOTTOM when doing (ptr - unknown), which could be a pointer
161 //   difference with a stack/static pointer.
162 //
163 // - PUTF: input should be non-pointer
164 //
165 // - arithmetic error messages: eg. for adding two pointers, just giving the
166 //   segments, not the actual pointers.
167 //
168 // What I am not checking, and would be difficult:
169 // - mmap(...MAP_FIXED...) is not handled specially.  It might be used in
170 //   ways that fool Ptrcheck into giving false positives.
171 //
172 // - syscalls: for those accessing memory, not checking that the asegs of the
173 //   accessed words match the vseg of the accessing pointer, because the
174 //   vseg is not easily accessible at the required time (would required
175 //   knowing for every syscall which register each arg came in, and looking
176 //   there).
177 //
178 // What I am not checking, and would be difficult, but doesn't matter:
179 // - free(p): similar to syscalls, not checking that the p.vseg matches the
180 //   aseg of the first byte in the block.  However, Memcheck does an
181 //   equivalent "bad free" check using shadow_chunks;  indeed, Ptrcheck could
182 //   do the same check, but there's no point duplicating functionality.  So
183 //   no loss, really.
184 //
185 // Other:
186 // - not doing anything with mprotect();  probably not worth the effort.
187 //--------------------------------------------------------------
188 
189 //--------------------------------------------------------------
190 // Todo:
191 // - Segments for stack frames.  Would detect (some, large) stack
192 //   over/under-runs, dangling pointers.
193 //
194 // - Segments for static data.  Would detect over/under-runs.  Requires
195 //   reading debug info.
196 //--------------------------------------------------------------
197 
198 //--------------------------------------------------------------
199 // Some profiling results:
200 //                                                 twolf   konq    date sz
201 // 1. started                                              35.0s   14.7
202 // 2. introduced GETV/PUTV                                 30.2s   10.1
203 // 3. inlined check_load_or_store                  5.6s    27.5s   10.1
204 // 4. (made check_load, check_store4 regparm(0))          (27.9s) (11.0)
205 // 5. um, not sure                                 5.3s    27.3s   10.6
206 //    ...
207 // 6. after big changes, corrections              11.2s    32.8s   14.0
208 // 7. removed link-segment chasing in check/L/S    8.9s    30.8s   14.0
209 // 8. avoiding do_lea1 if k is a nonptr            8.0s    28.0s   12.9
210 //--------------------------------------------------------------
211 
212 //#include "vg_skin.h"
213 
214 #include "pub_tool_basics.h"
215 #include "pub_tool_libcbase.h"
216 #include "pub_tool_libcprint.h"
217 #include "pub_tool_libcassert.h"
218 #include "pub_tool_mallocfree.h"
219 #include "pub_tool_execontext.h"
220 #include "pub_tool_hashtable.h"
221 #include "pub_tool_tooliface.h"
222 #include "pub_tool_replacemalloc.h"
223 #include "pub_tool_options.h"
224 #include "pub_tool_execontext.h"
225 #include "pub_tool_aspacemgr.h"    // VG_(am_shadow_malloc)
226 #include "pub_tool_vki.h"          // VKI_MAX_PAGE_SIZE
227 #include "pub_tool_machine.h"      // VG_({get,set}_shadow_regs_area) et al
228 #include "pub_tool_debuginfo.h"    // VG_(get_fnname)
229 #include "pub_tool_threadstate.h"  // VG_(get_running_tid)
230 #include "pub_tool_oset.h"
231 #include "pub_tool_vkiscnums.h"
232 #include "pub_tool_machine.h"
233 #include "pub_tool_wordfm.h"
234 #include "pub_tool_xarray.h"
235 
236 #include "pc_common.h"
237 
238 //#include "h_list.h"
239 #include "h_main.h"
240 
241 #include "sg_main.h"   // sg_instrument_*, and struct _SGEnv
242 
243 
244 
245 /*------------------------------------------------------------*/
246 /*--- Debug/trace options                                  ---*/
247 /*------------------------------------------------------------*/
248 
249 /* Set to 1 to do sanity checks on Seg values in many places, which
250    checks if bogus Segs are in circulation.  Quite expensive from a
251    performance point of view. */
252 #define SC_SEGS 0
253 
254 static ULong stats__client_mallocs = 0;
255 static ULong stats__client_frees   = 0;
256 static ULong stats__segs_allocd    = 0;
257 static ULong stats__segs_recycled  = 0;
258 
259 
260 //////////////////////////////////////////////////////////////
261 //                                                          //
262 // Segments low level storage                               //
263 //                                                          //
264 //////////////////////////////////////////////////////////////
265 
266 // NONPTR, UNKNOWN, BOTTOM defined in h_main.h since
267 // pc_common.c needs to see them, for error processing
268 
269 // we only start recycling segs when this many exist
270 #define N_FREED_SEGS (1 * 1000 * 1000)
271 
272 struct _Seg {
273    Addr  addr;
274    SizeT szB; /* may be zero */
275    ExeContext* ec;  /* where malloc'd or freed */
276    /* When 1, indicates block is in use.  Otherwise, used to form a
277       linked list of freed blocks, running from oldest freed block to
278       the most recently freed block. */
279    struct _Seg* nextfree;
280 };
281 
282 // Determines if 'a' is before, within, or after seg's range.  Sets 'cmp' to
283 // -1/0/1 accordingly.  Sets 'n' to the number of bytes before/within/after.
Seg__cmp(Seg * seg,Addr a,Int * cmp,UWord * n)284 void Seg__cmp(Seg* seg, Addr a, Int* cmp, UWord* n)
285 {
286    if (a < seg->addr) {
287       *cmp = -1;
288       *n   = seg->addr - a;
289    } else if (a < seg->addr + seg->szB && seg->szB > 0) {
290       *cmp = 0;
291       *n = a - seg->addr;
292    } else {
293       *cmp = 1;
294       *n = a - (seg->addr + seg->szB);
295    }
296 }
297 
Seg__is_freed(Seg * seg)298 inline Bool Seg__is_freed(Seg* seg)
299 {
300    if (!is_known_segment(seg))
301       return False;
302    else
303       return seg->nextfree != (Seg*)1;
304 }
305 
Seg__where(Seg * seg)306 ExeContext* Seg__where(Seg* seg)
307 {
308    tl_assert(is_known_segment(seg));
309    return seg->ec;
310 }
311 
Seg__size(Seg * seg)312 SizeT Seg__size(Seg* seg)
313 {
314    tl_assert(is_known_segment(seg));
315    return seg->szB;
316 }
317 
Seg__addr(Seg * seg)318 Addr Seg__addr(Seg* seg)
319 {
320    tl_assert(is_known_segment(seg));
321    return seg->addr;
322 }
323 
324 
325 #define N_SEGS_PER_GROUP 10000
326 
327 typedef
328    struct _SegGroup {
329       struct _SegGroup* admin;
330       UWord nextfree; /* 0 .. N_SEGS_PER_GROUP */
331       Seg segs[N_SEGS_PER_GROUP];
332    }
333    SegGroup;
334 
335 static SegGroup* group_list = NULL;
336 static UWord     nFreeSegs = 0;
337 static Seg*      freesegs_youngest = NULL;
338 static Seg*      freesegs_oldest = NULL;
339 
340 
new_SegGroup(void)341 static SegGroup* new_SegGroup ( void ) {
342    SegGroup* g = VG_(malloc)("pc.h_main.nTG.1", sizeof(SegGroup));
343    VG_(memset)(g, 0, sizeof(*g));
344    return g;
345 }
346 
347 /* Get a completely new Seg */
new_Seg(void)348 static Seg* new_Seg ( void )
349 {
350    Seg*      teg;
351    SegGroup* g;
352    if (group_list == NULL) {
353       g = new_SegGroup();
354       g->admin = NULL;
355       group_list = g;
356    }
357    tl_assert(group_list->nextfree <= N_SEGS_PER_GROUP);
358    if (group_list->nextfree == N_SEGS_PER_GROUP) {
359       g = new_SegGroup();
360       g->admin = group_list;
361       group_list = g;
362    }
363    tl_assert(group_list->nextfree < N_SEGS_PER_GROUP);
364    teg = &group_list->segs[ group_list->nextfree ];
365    group_list->nextfree++;
366    stats__segs_allocd++;
367    return teg;
368 }
369 
get_Seg_for_malloc(void)370 static Seg* get_Seg_for_malloc ( void )
371 {
372    Seg* seg;
373    if (nFreeSegs < N_FREED_SEGS) {
374       seg = new_Seg();
375       seg->nextfree = (Seg*)1;
376       return seg;
377    }
378    /* else recycle the oldest Seg in the free list */
379    tl_assert(freesegs_youngest);
380    tl_assert(freesegs_oldest);
381    tl_assert(freesegs_youngest != freesegs_oldest);
382    seg = freesegs_oldest;
383    freesegs_oldest = seg->nextfree;
384    nFreeSegs--;
385    seg->nextfree = (Seg*)1;
386    stats__segs_recycled++;
387    return seg;
388 }
389 
set_Seg_freed(Seg * seg)390 static void set_Seg_freed ( Seg* seg )
391 {
392    tl_assert(seg);
393    tl_assert(!Seg__is_freed(seg));
394    if (nFreeSegs == 0) {
395       tl_assert(freesegs_oldest == NULL);
396       tl_assert(freesegs_youngest == NULL);
397       seg->nextfree = NULL;
398       freesegs_youngest = seg;
399       freesegs_oldest = seg;
400       nFreeSegs++;
401    } else {
402       tl_assert(freesegs_youngest);
403       tl_assert(freesegs_oldest);
404       if (nFreeSegs == 1) {
405          tl_assert(freesegs_youngest == freesegs_oldest);
406       } else {
407          tl_assert(freesegs_youngest != freesegs_oldest);
408       }
409       tl_assert(freesegs_youngest->nextfree == NULL);
410       tl_assert(seg != freesegs_youngest && seg != freesegs_oldest);
411       seg->nextfree = NULL;
412       freesegs_youngest->nextfree = seg;
413       freesegs_youngest = seg;
414       nFreeSegs++;
415    }
416 }
417 
418 static WordFM* addr_to_seg_map = NULL; /* GuestAddr -> Seg* */
419 
addr_to_seg_map_ENSURE_INIT(void)420 static void addr_to_seg_map_ENSURE_INIT ( void )
421 {
422    if (UNLIKELY(addr_to_seg_map == NULL)) {
423       addr_to_seg_map = VG_(newFM)( VG_(malloc), "pc.h_main.attmEI.1",
424                                     VG_(free), NULL/*unboxedcmp*/ );
425    }
426 }
427 
find_Seg_by_addr(Addr ga)428 static Seg* find_Seg_by_addr ( Addr ga )
429 {
430    UWord keyW, valW;
431    addr_to_seg_map_ENSURE_INIT();
432    if (VG_(lookupFM)( addr_to_seg_map, &keyW, &valW, (UWord)ga )) {
433       tl_assert(keyW == ga);
434       return (Seg*)valW;
435    } else {
436       return NULL;
437    }
438 }
439 
bind_addr_to_Seg(Addr ga,Seg * seg)440 static void bind_addr_to_Seg ( Addr ga, Seg* seg )
441 {
442    Bool b;
443    addr_to_seg_map_ENSURE_INIT();
444    b = VG_(addToFM)( addr_to_seg_map, (UWord)ga, (UWord)seg );
445    tl_assert(!b); /* else ga is already bound */
446 }
447 
unbind_addr_from_Seg(Addr ga)448 static void unbind_addr_from_Seg ( Addr ga )
449 {
450    Bool b;
451    UWord keyW, valW;
452    addr_to_seg_map_ENSURE_INIT();
453    b = VG_(delFromFM)( addr_to_seg_map, &keyW, &valW, (UWord)ga );
454    tl_assert(b); /* else ga was not already bound */
455    tl_assert(keyW == ga);
456    tl_assert(valW != 0);
457 }
458 
459 
460 //////////////////////////////////////////////////////////////
461 //////////////////////////////////////////////////////////////
462 //////////////////////////////////////////////////////////////
463 
464 // So that post_reg_write_clientcall knows the segment just allocated.
465 static Seg* last_seg_added = NULL;
466 
467 // Returns the added heap segment
add_new_segment(ThreadId tid,Addr p,SizeT size)468 static Seg* add_new_segment ( ThreadId tid, Addr p, SizeT size )
469 {
470    Seg* seg = get_Seg_for_malloc();
471    tl_assert(seg != (Seg*)1); /* since we're using 1 as a special value */
472    seg->addr = p;
473    seg->szB  = size;
474    seg->ec   = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
475    tl_assert(!Seg__is_freed(seg));
476 
477    bind_addr_to_Seg(p, seg);
478 
479    last_seg_added = seg;
480 
481    return seg;
482 }
483 
484 // Forward declarations
485 static void copy_mem( Addr from, Addr to, SizeT len );
486 static void set_mem_unknown ( Addr a, SizeT len );
487 
488 static inline VG_REGPARM(1) Seg* nonptr_or_unknown(UWord x); /*fwds*/
489 
490 static
alloc_and_new_mem_heap(ThreadId tid,SizeT size,SizeT alignment,Bool is_zeroed)491 void* alloc_and_new_mem_heap ( ThreadId tid,
492                                SizeT size, SizeT alignment, Bool is_zeroed )
493 {
494    Addr p;
495 
496    if ( ((SSizeT)size) < 0) return NULL;
497 
498    p = (Addr)VG_(cli_malloc)(alignment, size);
499    if (is_zeroed) VG_(memset)((void*)p, 0, size);
500 
501    set_mem_unknown( p, size );
502    add_new_segment( tid, p, size );
503 
504    stats__client_mallocs++;
505    return (void*)p;
506 }
507 
die_and_free_mem_heap(ThreadId tid,Seg * seg)508 static void die_and_free_mem_heap ( ThreadId tid, Seg* seg )
509 {
510    // Empty and free the actual block
511    tl_assert(!Seg__is_freed(seg));
512    set_mem_unknown( seg->addr, seg->szB );
513 
514    VG_(cli_free)( (void*)seg->addr );
515 
516    // Remember where freed
517    seg->ec = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
518 
519    set_Seg_freed(seg);
520    unbind_addr_from_Seg( seg->addr );
521 
522    stats__client_frees++;
523 }
524 
handle_free_heap(ThreadId tid,void * p)525 static void handle_free_heap( ThreadId tid, void* p )
526 {
527    Seg* seg = find_Seg_by_addr( (Addr)p );
528    if (!seg) {
529       /* freeing a block that wasn't malloc'd.  Ignore. */
530       return;
531    }
532    die_and_free_mem_heap( tid, seg );
533 }
534 
535 
536 /*------------------------------------------------------------*/
537 /*--- Shadow memory                                        ---*/
538 /*------------------------------------------------------------*/
539 
540 /* Shadow memory holds one Seg for each naturally aligned (guest)
541    word.  For a 32 bit target (assuming host word size == guest word
542    size) that means one Seg per 4 bytes, and each Seg occupies 4
543    bytes.  For a 64 bit target that means one Seg per 8 bytes, and
544    each Seg occupies 8 bytes.  Hence in each case the overall space
545    overhead for shadow memory is 1:1.
546 
547    This does however make it a bit tricky to size SecMap.vseg[], simce
548    it needs to hold 16384 entries for 32 bit targets but only 8192
549    entries for 64 bit targets. */
550 
551 #if 0
552 __attribute__((unused))
553 static void pp_curr_ExeContext(void)
554 {
555    VG_(pp_ExeContext)(
556       VG_(get_ExeContext)(
557          VG_(get_current_or_recent_tid)() ) );
558    VG_(message)(Vg_UserMsg, "");
559 }
560 #endif
561 
562 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_arm)
563 #  define SHMEM_SECMAP_MASK         0xFFFC
564 #  define SHMEM_SECMAP_SHIFT        2
565 #  define SHMEM_IS_WORD_ALIGNED(_a) VG_IS_4_ALIGNED(_a)
566 #  define SEC_MAP_WORDS             (0x10000UL / 4UL) /* 16k */
567 #elif defined(VGA_amd64) || defined(VGA_ppc64)
568 #  define SHMEM_SECMAP_MASK         0xFFF8
569 #  define SHMEM_SECMAP_SHIFT        3
570 #  define SHMEM_IS_WORD_ALIGNED(_a) VG_IS_8_ALIGNED(_a)
571 #  define SEC_MAP_WORDS             (0x10000UL / 8UL) /* 8k */
572 #else
573 #  error "Unknown arch"
574 #endif
575 
576 typedef
577    struct {
578       Seg* vseg[SEC_MAP_WORDS];
579    }
580    SecMap;
581 
582 static SecMap  distinguished_secondary_map;
583 
584 /* An entry in the primary map.  base must be a 64k-aligned value, and
585    sm points at the relevant secondary map.  The secondary may be
586    either a real secondary, or the distinguished secondary.  DO NOT
587    CHANGE THIS LAYOUT: the first word has to be the key for OSet fast
588    lookups.
589 */
590 typedef
591    struct {
592       Addr    base;
593       SecMap* sm;
594    }
595    PriMapEnt;
596 
597 /* Primary map is an OSet of PriMapEnt (primap_L2), "fronted" by a
598    cache (primap_L1). */
599 
600 /* Tunable parameter: How big is the L1 queue? */
601 #define N_PRIMAP_L1 24
602 
603 /* Tunable parameter: How far along the L1 queue to insert
604    entries resulting from L2 lookups? */
605 #define PRIMAP_L1_INSERT_IX 12
606 
607 static struct {
608           Addr       base; // must be 64k aligned
609           PriMapEnt* ent; // pointer to the matching primap_L2 node
610        }
611        primap_L1[N_PRIMAP_L1];
612 
613 static OSet* primap_L2 = NULL;
614 
615 
616 /* # searches initiated in auxmap_L1, and # base cmps required */
617 static ULong n_primap_L1_searches  = 0;
618 static ULong n_primap_L1_cmps      = 0;
619 /* # of searches that missed in auxmap_L1 and therefore had to
620    be handed to auxmap_L2. And the number of nodes inserted. */
621 static ULong n_primap_L2_searches  = 0;
622 static ULong n_primap_L2_nodes     = 0;
623 
624 
init_shadow_memory(void)625 static void init_shadow_memory ( void )
626 {
627    Int i;
628 
629    for (i = 0; i < SEC_MAP_WORDS; i++)
630       distinguished_secondary_map.vseg[i] = NONPTR;
631 
632    for (i = 0; i < N_PRIMAP_L1; i++) {
633       primap_L1[i].base = 1; /* not 64k aligned, so doesn't match any
634                                 request ==> slot is empty */
635       primap_L1[i].ent  = NULL;
636    }
637 
638    tl_assert(0 == offsetof(PriMapEnt,base));
639    tl_assert(sizeof(Addr) == sizeof(void*));
640    primap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(PriMapEnt,base),
641                                     /*fastCmp*/ NULL,
642                                     VG_(malloc), "pc.h_main.ism.1",
643                                     VG_(free) );
644    tl_assert(primap_L2);
645 }
646 
insert_into_primap_L1_at(Word rank,PriMapEnt * ent)647 static void insert_into_primap_L1_at ( Word rank, PriMapEnt* ent )
648 {
649    Word i;
650    tl_assert(ent);
651    tl_assert(rank >= 0 && rank < N_PRIMAP_L1);
652    for (i = N_PRIMAP_L1-1; i > rank; i--)
653       primap_L1[i] = primap_L1[i-1];
654    primap_L1[rank].base = ent->base;
655    primap_L1[rank].ent  = ent;
656 }
657 
maybe_find_in_primap(Addr a)658 static inline PriMapEnt* maybe_find_in_primap ( Addr a )
659 {
660    PriMapEnt  key;
661    PriMapEnt* res;
662    Word       i;
663 
664    a &= ~(Addr)0xFFFF;
665 
666    /* First search the front-cache, which is a self-organising
667       list containing the most popular entries. */
668 
669    if (LIKELY(primap_L1[0].base == a))
670       return primap_L1[0].ent;
671    if (LIKELY(primap_L1[1].base == a)) {
672       Addr       t_base = primap_L1[0].base;
673       PriMapEnt* t_ent  = primap_L1[0].ent;
674       primap_L1[0].base = primap_L1[1].base;
675       primap_L1[0].ent  = primap_L1[1].ent;
676       primap_L1[1].base = t_base;
677       primap_L1[1].ent  = t_ent;
678       return primap_L1[0].ent;
679    }
680 
681    n_primap_L1_searches++;
682 
683    for (i = 0; i < N_PRIMAP_L1; i++) {
684       if (primap_L1[i].base == a) {
685          break;
686       }
687    }
688    tl_assert(i >= 0 && i <= N_PRIMAP_L1);
689 
690    n_primap_L1_cmps += (ULong)(i+1);
691 
692    if (i < N_PRIMAP_L1) {
693       if (i > 0) {
694          Addr       t_base = primap_L1[i-1].base;
695          PriMapEnt* t_ent  = primap_L1[i-1].ent;
696          primap_L1[i-1].base = primap_L1[i-0].base;
697          primap_L1[i-1].ent  = primap_L1[i-0].ent;
698          primap_L1[i-0].base = t_base;
699          primap_L1[i-0].ent  = t_ent;
700          i--;
701       }
702       return primap_L1[i].ent;
703    }
704 
705    n_primap_L2_searches++;
706 
707    /* First see if we already have it. */
708    key.base = a;
709    key.sm   = 0;
710 
711    res = VG_(OSetGen_Lookup)(primap_L2, &key);
712    if (res)
713       insert_into_primap_L1_at( PRIMAP_L1_INSERT_IX, res );
714    return res;
715 }
716 
alloc_secondary_map(void)717 static SecMap* alloc_secondary_map ( void )
718 {
719    SecMap* map;
720    UInt  i;
721 
722    // JRS 2008-June-25: what's the following assertion for?
723    tl_assert(0 == (sizeof(SecMap) % VKI_MAX_PAGE_SIZE));
724 
725    map = VG_(am_shadow_alloc)( sizeof(SecMap) );
726    if (map == NULL)
727       VG_(out_of_memory_NORETURN)( "annelid:allocate new SecMap",
728                                    sizeof(SecMap) );
729 
730    for (i = 0; i < SEC_MAP_WORDS; i++)
731       map->vseg[i] = NONPTR;
732    if (0) VG_(printf)("XXX new secmap %p\n", map);
733    return map;
734 }
735 
find_or_alloc_in_primap(Addr a)736 static PriMapEnt* find_or_alloc_in_primap ( Addr a )
737 {
738    PriMapEnt *nyu, *res;
739 
740    /* First see if we already have it. */
741    res = maybe_find_in_primap( a );
742    if (LIKELY(res))
743       return res;
744 
745    /* Ok, there's no entry in the secondary map, so we'll have
746       to allocate one. */
747    a &= ~(Addr)0xFFFF;
748 
749    nyu = (PriMapEnt*) VG_(OSetGen_AllocNode)(
750                          primap_L2, sizeof(PriMapEnt) );
751    tl_assert(nyu);
752    nyu->base = a;
753    nyu->sm   = alloc_secondary_map();
754    tl_assert(nyu->sm);
755    VG_(OSetGen_Insert)( primap_L2, nyu );
756    insert_into_primap_L1_at( PRIMAP_L1_INSERT_IX, nyu );
757    n_primap_L2_nodes++;
758    return nyu;
759 }
760 
761 /////////////////////////////////////////////////
762 
763 // Nb: 'a' must be naturally word aligned for the host.
get_mem_vseg(Addr a)764 static inline Seg* get_mem_vseg ( Addr a )
765 {
766    SecMap* sm     = find_or_alloc_in_primap(a)->sm;
767    UWord   sm_off = (a & SHMEM_SECMAP_MASK) >> SHMEM_SECMAP_SHIFT;
768    tl_assert(SHMEM_IS_WORD_ALIGNED(a));
769    return sm->vseg[sm_off];
770 }
771 
772 // Nb: 'a' must be naturally word aligned for the host.
set_mem_vseg(Addr a,Seg * vseg)773 static inline void set_mem_vseg ( Addr a, Seg* vseg )
774 {
775    SecMap* sm     = find_or_alloc_in_primap(a)->sm;
776    UWord   sm_off = (a & SHMEM_SECMAP_MASK) >> SHMEM_SECMAP_SHIFT;
777    tl_assert(SHMEM_IS_WORD_ALIGNED(a));
778    sm->vseg[sm_off] = vseg;
779 }
780 
781 // Find the Seg which contains the given address.
782 // Returns UNKNOWN if no matches.  Never returns BOTTOM or NONPTR.
783 // Also, only returns in-use segments, not freed ones.
784 /* Doing this fast is distinctly difficult when there are more than a
785    few heap allocated blocks live.  Basically it is done by searching
786    addr_to_seg_map for 'a'.
787 
788    First, if 'a' is the start address of a segment, then we can detect
789    that by simply doing a VG_(lookupFM) of 'a', and we are done (nice
790    and easy).
791 
792    If 'a' is within some segment, but does not point to the start, it
793    is much more complex.  We use VG_(findBoundsFM) to find the segment
794    with the largest .addr field which is <= a, and we then inspect the
795    segment to see if 'a' really falls inside it or not.  This is all a
796    bit complex and fragile, and so there's a lot of assertery in the
797    code below.  It has been crosschecked however against the trivial
798    _SLOW implementation shown after the end of this fn.
799 */
get_Seg_containing_addr(Addr a)800 static Seg* get_Seg_containing_addr( Addr a )
801 {
802    UWord keyW, valW;
803    Seg*  s2;
804 
805    /* Since we are going to poke around in it */
806    addr_to_seg_map_ENSURE_INIT();
807 
808    /* first, see if 'a' is at the start of a block.  We do this both
809       because it's easy and more imporantly because VG_(findBoundsFM)
810       will fail in this case, so we need to exclude it first. */
811    if (VG_(lookupFM)( addr_to_seg_map, &keyW, &valW, a )) {
812       tl_assert(keyW == a);
813       s2 = (Seg*)valW;
814       tl_assert(s2->addr == a);
815    } else {
816       Bool  ok;
817       UWord kMin, vMin, kMax, vMax;
818       Seg   minSeg;
819       Seg   maxSeg;
820       UWord minAddr = 0;
821       UWord maxAddr = ~minAddr;
822       VG_(memset)(&minSeg, 0, sizeof(minSeg));
823       VG_(memset)(&maxSeg, 0, sizeof(maxSeg));
824       minSeg.addr = minAddr;
825       maxSeg.addr = maxAddr;
826       ok = VG_(findBoundsFM)( addr_to_seg_map,
827                               &kMin, &vMin, &kMax, &vMax,
828                               minAddr, (UWord)&minSeg,
829                               maxAddr, (UWord)&maxSeg, a );
830       tl_assert(ok); /* must be so, since False is only returned when
831                         'a' is directly present in the map, and we
832                         just established that it isn't. */
833       /* At this point, either vMin points at minSeg, or it points at a
834          real Seg.  In the former case, there is no live heap-allocated
835          Seg which has a start address <= a, so a is not in any block.
836          In the latter case, the Seg vMin points at may or may not
837          actually contain 'a'; we can only tell that by inspecting the
838          Seg itself. */
839       s2 = (Seg*)vMin;
840       tl_assert(kMin == s2->addr);
841       if (s2 == &minSeg) {
842          /* the former */
843          s2 = UNKNOWN;
844       } else {
845          /* the latter */
846          tl_assert(s2->addr <= a);
847          /* if s2 doesn't actually contain 'a', we must forget about it. */
848          if (s2->szB == 0 /* a zero sized block can't contain anything */
849              || s2->addr + s2->szB < a /* the usual range check */)
850             s2 = UNKNOWN;
851       }
852       /* while we're at it, do as much assertery as we can, since this
853          is all rather complex.  Either vMax points at maxSeg, or it
854          points to a real block, which must have a start address
855          greater than a. */
856       tl_assert(kMax == ((Seg*)vMax)->addr);
857       if (vMax == (UWord)&maxSeg) {
858          /* nothing we can check */
859       } else {
860          tl_assert(a < kMax); /* hence also a < ((Seg*)vMax)->addr */
861       }
862    }
863 
864    return s2;
865 }
866 
867 /* XXXX very slow reference implementation.  Do not use.
868 static Seg* get_Seg_containing_addr_SLOW( Addr a )
869 {
870    SegGroup* group;
871    UWord i;
872    stats__slow_searches++;
873    for (group = group_list; group; group = group->admin) {
874       for (i = 0; i < group->nextfree; i++) {
875          stats__slow_totcmps++;
876          if (Seg__is_freed(&group->segs[i]))
877             continue;
878          if (group->segs[i].addr <= a
879              && a < group->segs[i].addr + group->segs[i].szB)
880             return &group->segs[i];
881       }
882    }
883    return UNKNOWN;
884 }
885 */
886 
887 
888 
889 /*------------------------------------------------------------*/
890 /*--- malloc() et al replacements                          ---*/
891 /*------------------------------------------------------------*/
892 
h_replace_malloc(ThreadId tid,SizeT n)893 void* h_replace_malloc ( ThreadId tid, SizeT n )
894 {
895    return alloc_and_new_mem_heap ( tid, n, VG_(clo_alignment),
896                                         /*is_zeroed*/False );
897 }
898 
h_replace___builtin_new(ThreadId tid,SizeT n)899 void* h_replace___builtin_new ( ThreadId tid, SizeT n )
900 {
901    return alloc_and_new_mem_heap ( tid, n, VG_(clo_alignment),
902                                            /*is_zeroed*/False );
903 }
904 
h_replace___builtin_vec_new(ThreadId tid,SizeT n)905 void* h_replace___builtin_vec_new ( ThreadId tid, SizeT n )
906 {
907    return alloc_and_new_mem_heap ( tid, n, VG_(clo_alignment),
908                                            /*is_zeroed*/False );
909 }
910 
h_replace_memalign(ThreadId tid,SizeT align,SizeT n)911 void* h_replace_memalign ( ThreadId tid, SizeT align, SizeT n )
912 {
913    return alloc_and_new_mem_heap ( tid, n, align,
914                                         /*is_zeroed*/False );
915 }
916 
h_replace_calloc(ThreadId tid,SizeT nmemb,SizeT size1)917 void* h_replace_calloc ( ThreadId tid, SizeT nmemb, SizeT size1 )
918 {
919    return alloc_and_new_mem_heap ( tid, nmemb*size1, VG_(clo_alignment),
920                                         /*is_zeroed*/True );
921 }
922 
h_replace_free(ThreadId tid,void * p)923 void h_replace_free ( ThreadId tid, void* p )
924 {
925    // Should arguably check here if p.vseg matches the segID of the
926    // pointed-to block... unfortunately, by this stage, we don't know what
927    // p.vseg is, because we don't know the address of p (the p here is a
928    // copy, and we've lost the address of its source).  To do so would
929    // require passing &p in, which would require rewriting part of
930    // vg_replace_malloc.c... argh.
931    //
932    // However, Memcheck does free checking, and will catch almost all
933    // violations this checking would have caught.  (Would only miss if we
934    // unluckily passed an unrelated pointer to the very start of a heap
935    // block that was unrelated to that block.  This is very unlikely!)    So
936    // we haven't lost much.
937 
938    handle_free_heap(tid, p);
939 }
940 
h_replace___builtin_delete(ThreadId tid,void * p)941 void h_replace___builtin_delete ( ThreadId tid, void* p )
942 {
943    handle_free_heap(tid, p);
944 }
945 
h_replace___builtin_vec_delete(ThreadId tid,void * p)946 void h_replace___builtin_vec_delete ( ThreadId tid, void* p )
947 {
948    handle_free_heap(tid, p);
949 }
950 
h_replace_realloc(ThreadId tid,void * p_old,SizeT new_size)951 void* h_replace_realloc ( ThreadId tid, void* p_old, SizeT new_size )
952 {
953    Seg* seg;
954 
955    /* First try and find the block. */
956    seg = find_Seg_by_addr( (Addr)p_old );
957    if (!seg)
958       return NULL;
959 
960    tl_assert(seg->addr == (Addr)p_old);
961 
962    if (new_size <= seg->szB) {
963       /* new size is smaller: allocate, copy from old to new */
964       Addr p_new = (Addr)VG_(cli_malloc)(VG_(clo_alignment), new_size);
965       VG_(memcpy)((void*)p_new, p_old, new_size);
966 
967       /* Notification: copy retained part */
968       copy_mem       ( (Addr)p_old, p_new, new_size );
969 
970       /* Free old memory */
971       die_and_free_mem_heap( tid, seg );
972 
973       /* This has to be after die_and_free_mem_heap, otherwise the
974          former succeeds in shorting out the new block, not the
975          old, in the case when both are on the same list.  */
976       add_new_segment ( tid, p_new, new_size );
977 
978       return (void*)p_new;
979    } else {
980       /* new size is bigger: allocate, copy from old to new */
981       Addr p_new = (Addr)VG_(cli_malloc)(VG_(clo_alignment), new_size);
982       VG_(memcpy)((void*)p_new, p_old, seg->szB);
983 
984       /* Notification: first half kept and copied, second half new */
985       copy_mem       ( (Addr)p_old, p_new, seg->szB );
986       set_mem_unknown( p_new + seg->szB, new_size - seg->szB );
987 
988       /* Free old memory */
989       die_and_free_mem_heap( tid, seg );
990 
991       /* This has to be after die_and_free_mem_heap, otherwise the
992          former succeeds in shorting out the new block, not the old,
993          in the case when both are on the same list.  NB jrs
994          2008-Sept-11: not sure if this comment is valid/correct any
995          more -- I suspect not. */
996       add_new_segment ( tid, p_new, new_size );
997 
998       return (void*)p_new;
999    }
1000 }
1001 
h_replace_malloc_usable_size(ThreadId tid,void * p)1002 SizeT h_replace_malloc_usable_size ( ThreadId tid, void* p )
1003 {
1004    Seg* seg = find_Seg_by_addr( (Addr)p );
1005 
1006    // There may be slop, but pretend there isn't because only the asked-for
1007    // area will have been shadowed properly.
1008    return ( seg ? seg->szB : 0 );
1009 }
1010 
1011 
1012 /*------------------------------------------------------------*/
1013 /*--- Memory events                                        ---*/
1014 /*------------------------------------------------------------*/
1015 
1016 static inline
set_mem(Addr a,SizeT len,Seg * seg)1017 void set_mem ( Addr a, SizeT len, Seg* seg )
1018 {
1019    Addr end;
1020 
1021    if (0 == len)
1022       return;
1023 
1024    if (len > 100 * 1000 * 1000)
1025       VG_(message)(Vg_UserMsg,
1026                    "Warning: set address range state: large range %lu\n",
1027                    len);
1028 
1029    a   = VG_ROUNDDN(a,       sizeof(UWord));
1030    end = VG_ROUNDUP(a + len, sizeof(UWord));
1031    for ( ; a < end; a += sizeof(UWord))
1032       set_mem_vseg(a, seg);
1033 }
1034 
set_mem_unknown(Addr a,SizeT len)1035 static void set_mem_unknown( Addr a, SizeT len )
1036 {
1037    set_mem( a, len, UNKNOWN );
1038 }
1039 
1040 //zz static void set_mem_nonptr( Addr a, UInt len )
1041 //zz {
1042 //zz    set_mem( a, len, NONPTR );
1043 //zz }
1044 
h_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)1045 void h_new_mem_startup( Addr a, SizeT len,
1046                         Bool rr, Bool ww, Bool xx, ULong di_handle )
1047 {
1048    if (0) VG_(printf)("new_mem_startup(%#lx,%lu)\n", a, len);
1049    set_mem_unknown( a, len );
1050    //add_new_segment( VG_(get_running_tid)(), a, len, SegMmap );
1051 }
1052 
1053 //zz // XXX: Currently not doing anything with brk() -- new segments, or not?
1054 //zz // Proper way to do it would be to grow/shrink a single, special brk segment.
1055 //zz //
1056 //zz // brk is difficult: it defines a single segment, of changeable size.
1057 //zz // It starts off with size zero, at the address given by brk(0).  There are
1058 //zz // no pointers within the program to it.  Any subsequent calls by the
1059 //zz // program to brk() (possibly growing or shrinking it) return pointers to
1060 //zz // the *end* of the segment (nb: this is the kernel brk(), which is
1061 //zz // different to the libc brk()).
1062 //zz //
1063 //zz // If fixing this, don't forget to update the brk case in SK_(post_syscall).
1064 //zz //
1065 //zz // Nb: not sure if the return value is the last byte addressible, or one
1066 //zz // past the end of the segment.
1067 //zz //
1068 //zz static void new_mem_brk( Addr a, UInt len )
1069 //zz {
1070 //zz    set_mem_unknown(a, len);
1071 //zz    //VG_(skin_panic)("can't handle new_mem_brk");
1072 //zz }
1073 
1074 // Not quite right:  if you mmap a segment into a specified place, it could
1075 // be legitimate to do certain arithmetic with the pointer that it wouldn't
1076 // otherwise.  Hopefully this is rare, though.
h_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)1077 void h_new_mem_mmap( Addr a, SizeT len,
1078                      Bool rr, Bool ww, Bool xx, ULong di_handle )
1079 {
1080    if (0) VG_(printf)("new_mem_mmap(%#lx,%lu)\n", a, len);
1081 //zz #if 0
1082 //zz    Seg seg = NULL;
1083 //zz
1084 //zz    // Check for overlapping segments
1085 //zz #if 0
1086 //zz    is_overlapping_seg___a   = a;    // 'free' variable
1087 //zz    is_overlapping_seg___len = len;  // 'free' variable
1088 //zz    seg = (Seg)VG_(HT_first_match) ( mlist, is_overlapping_seg );
1089 //zz    is_overlapping_seg___a   = 0;    // paranoia, reset
1090 //zz    is_overlapping_seg___len = 0;    // paranoia, reset
1091 //zz #endif
1092 //zz
1093 //zz    // XXX: do this check properly with ISLists
1094 //zz
1095 //zz    if ( ISList__findI( seglist, a, &seg )) {
1096 //zz       sk_assert(SegMmap == seg->status || SegMmapFree == seg->status);
1097 //zz       if (SegMmap == seg->status)
1098 //zz
1099 //zz    }
1100 //zz
1101 //zz    if (NULL != seg) {
1102 //zz       // Right, we found an overlap
1103 //zz       if (VG_(clo_verbosity) > 1)
1104 //zz          VG_(message)(Vg_UserMsg, "mmap overlap:  old: %#lx, %d;  new: %#lx, %d",
1105 //zz                                   seg->left, Seg__size(seg), a, len);
1106 //zz       if (seg->left <= a && a <= seg->right) {
1107 //zz          // New one truncates end of the old one.  Nb: we don't adjust its
1108 //zz          // size, because the first segment's pointer can be (and for
1109 //zz          // Konqueror, is) legitimately used to access parts of the second
1110 //zz          // segment.  At least, I assume Konqueror is doing something legal.
1111 //zz          // so that a size mismatch upon munmap isn't a problem.
1112 //zz //         seg->size = a - seg->data;
1113 //zz //         seg->is_truncated_map = True;
1114 //zz //         if (VG_(clo_verbosity) > 1)
1115 //zz //            VG_(message)(Vg_UserMsg, "old seg truncated to length %d",
1116 //zz //                                     seg->size);
1117 //zz       } else {
1118 //zz          VG_(skin_panic)("Can't handle this mmap() overlap case");
1119 //zz       }
1120 //zz    }
1121    set_mem_unknown( a, len );
1122    //add_new_segment( VG_(get_running_tid)(), a, len, SegMmap );
1123 //zz #endif
1124 }
1125 
copy_mem(Addr from,Addr to,SizeT len)1126 static void copy_mem( Addr from, Addr to, SizeT len )
1127 {
1128    Addr fromend = from + len;
1129 
1130    // Must be aligned due to malloc always returning aligned objects.
1131    tl_assert(VG_IS_8_ALIGNED(from) && VG_IS_8_ALIGNED(to));
1132 
1133    // Must only be called with positive len.
1134    if (0 == len)
1135       return;
1136 
1137    for ( ; from < fromend; from += sizeof(UWord), to += sizeof(UWord))
1138       set_mem_vseg( to, get_mem_vseg(from) );
1139 }
1140 
1141 //zz // Similar to SK_(realloc)()
1142 //zz static void copy_mem_remap( Addr from, Addr to, UInt len )
1143 //zz {
1144 //zz    VG_(skin_panic)("argh: copy_mem_remap");
1145 //zz }
1146 //zz
1147 //zz static void die_mem_brk( Addr a, UInt len )
1148 //zz {
1149 //zz    set_mem_unknown(a, len);
1150 //zz //   VG_(skin_panic)("can't handle die_mem_brk()");
1151 //zz }
1152 
h_die_mem_munmap(Addr a,SizeT len)1153 void h_die_mem_munmap( Addr a, SizeT len )
1154 {
1155 //   handle_free_munmap( (void*)a, len );
1156 }
1157 
1158 // Don't need to check all addresses within the block; in the absence of
1159 // discontiguous segments, the segments for the first and last bytes should
1160 // be the same.  Can't easily check the pointer segment matches the block
1161 // segment, unfortunately, but the first/last check should catch most
1162 // errors.
pre_mem_access2(CorePart part,ThreadId tid,Char * str,Addr s,Addr e)1163 static void pre_mem_access2 ( CorePart part, ThreadId tid, Char* str,
1164                               Addr s/*tart*/, Addr e/*nd*/ )
1165 {
1166    Seg  *seglo, *seghi;
1167 
1168    // Don't check code being translated -- very slow, and not much point
1169    if (Vg_CoreTranslate == part) return;
1170 
1171    // Don't check the signal case -- only happens in core, no need to check
1172    if (Vg_CoreSignal == part) return;
1173 
1174    // Only expect syscalls after this point
1175    if (part != Vg_CoreSysCall) {
1176       VG_(printf)("part = %d\n", part);
1177       VG_(tool_panic)("unknown corepart in pre_mem_access2");
1178    }
1179 
1180    // Check first and last bytes match
1181    seglo = get_Seg_containing_addr( s );
1182    seghi = get_Seg_containing_addr( e );
1183    tl_assert( BOTTOM != seglo && NONPTR != seglo );
1184    tl_assert( BOTTOM != seghi && NONPTR != seghi );
1185 
1186    /* record an error if start and end are in different, but known segments */
1187    if (is_known_segment(seglo) && is_known_segment(seghi)
1188        && seglo != seghi) {
1189       h_record_sysparam_error(tid, part, str, s, e, seglo, seghi);
1190    }
1191    else
1192    /* record an error if start is in a known segment but end isn't */
1193    if (is_known_segment(seglo) && !is_known_segment(seghi)) {
1194       h_record_sysparam_error(tid, part, str, s, e, seglo, UNKNOWN);
1195    }
1196    else
1197    /* record an error if end is in a known segment but start isn't */
1198    if (!is_known_segment(seglo) && is_known_segment(seghi)) {
1199       h_record_sysparam_error(tid, part, str, s, e, UNKNOWN, seghi);
1200    }
1201 }
1202 
h_pre_mem_access(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)1203 void h_pre_mem_access ( CorePart part, ThreadId tid, Char* s,
1204                         Addr base, SizeT size )
1205 {
1206    pre_mem_access2( part, tid, s, base, base + size - 1 );
1207 }
1208 
h_pre_mem_read_asciiz(CorePart part,ThreadId tid,Char * s,Addr lo)1209 void h_pre_mem_read_asciiz ( CorePart part, ThreadId tid,
1210                              Char* s, Addr lo )
1211 {
1212    Addr hi = lo;
1213 
1214    // Nb: the '\0' must be included in the lo...hi range
1215    while ('\0' != *(Char*)hi) hi++;
1216    pre_mem_access2( part, tid, s, lo, hi );
1217 }
1218 
1219 //zz static void post_mem_write(Addr a, UInt len)
1220 //zz {
1221 //zz    set_mem_unknown(a, len);
1222 //zz }
1223 
1224 
1225 /*------------------------------------------------------------*/
1226 /*--- Register event handlers                              ---*/
1227 /*------------------------------------------------------------*/
1228 
1229 //zz static void post_regs_write_init ( void )
1230 //zz {
1231 //zz    UInt i;
1232 //zz    for (i = R_EAX; i <= R_EDI; i++)
1233 //zz       VG_(set_shadow_archreg)( i, (UInt)UNKNOWN );
1234 //zz
1235 //zz    // Don't bother about eflags
1236 //zz }
1237 
1238 // BEGIN move this uglyness to pc_machine.c
1239 
host_is_big_endian(void)1240 static inline Bool host_is_big_endian ( void ) {
1241    UInt x = 0x11223344;
1242    return 0x1122 == *(UShort*)(&x);
1243 }
host_is_little_endian(void)1244 static inline Bool host_is_little_endian ( void ) {
1245    UInt x = 0x11223344;
1246    return 0x3344 == *(UShort*)(&x);
1247 }
1248 
1249 #define N_INTREGINFO_OFFSETS 4
1250 
1251 /* Holds the result of a query to 'get_IntRegInfo'.  Valid values for
1252    n_offsets are:
1253 
1254    -1: means the queried guest state slice exactly matches
1255        one integer register
1256 
1257    0: means the queried guest state slice does not overlap any
1258       integer registers
1259 
1260    1 .. N_INTREGINFO_OFFSETS: means the queried guest state offset
1261       overlaps n_offsets different integer registers, and their base
1262       offsets are placed in the offsets array.
1263 */
1264 typedef
1265    struct {
1266       Int offsets[N_INTREGINFO_OFFSETS];
1267       Int n_offsets;
1268    }
1269    IntRegInfo;
1270 
1271 
1272 #if defined(VGA_x86)
1273 # include "libvex_guest_x86.h"
1274 # define PC_SIZEOF_GUEST_STATE sizeof(VexGuestX86State)
1275 #endif
1276 
1277 #if defined(VGA_amd64)
1278 # include "libvex_guest_amd64.h"
1279 # define PC_SIZEOF_GUEST_STATE sizeof(VexGuestAMD64State)
1280 # define PC_OFF_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO)
1281 # define PC_SZB_FS_ZERO sizeof( ((VexGuestAMD64State*)0)->guest_FS_ZERO)
1282 #endif
1283 
1284 #if defined(VGA_ppc32)
1285 # include "libvex_guest_ppc32.h"
1286 # define PC_SIZEOF_GUEST_STATE sizeof(VexGuestPPC32State)
1287 #endif
1288 
1289 #if defined(VGA_ppc64)
1290 # include "libvex_guest_ppc64.h"
1291 # define PC_SIZEOF_GUEST_STATE sizeof(VexGuestPPC64State)
1292 #endif
1293 
1294 #if defined(VGA_arm)
1295 # include "libvex_guest_arm.h"
1296 # define PC_SIZEOF_GUEST_STATE sizeof(VexGuestARMState)
1297 #endif
1298 
1299 
1300 /* See description on definition of type IntRegInfo. */
get_IntRegInfo(IntRegInfo * iii,Int offset,Int szB)1301 static void get_IntRegInfo ( /*OUT*/IntRegInfo* iii, Int offset, Int szB )
1302 {
1303    /* --------------------- x86 --------------------- */
1304 
1305 #  if defined(VGA_x86)
1306 
1307 #  define GOF(_fieldname) \
1308       (offsetof(VexGuestX86State,guest_##_fieldname))
1309 
1310    Int  o    = offset;
1311    Int  sz   = szB;
1312    Bool is4  = sz == 4;
1313    Bool is21 = sz == 2 || sz == 1;
1314 
1315    tl_assert(sz > 0);
1316    tl_assert(host_is_little_endian());
1317 
1318    /* Set default state to "does not intersect any int register". */
1319    VG_(memset)( iii, 0, sizeof(*iii) );
1320 
1321    /* Exact accesses to integer registers */
1322    if (o == GOF(EAX)     && is4) goto exactly1;
1323    if (o == GOF(ECX)     && is4) goto exactly1;
1324    if (o == GOF(EDX)     && is4) goto exactly1;
1325    if (o == GOF(EBX)     && is4) goto exactly1;
1326    if (o == GOF(ESP)     && is4) goto exactly1;
1327    if (o == GOF(EBP)     && is4) goto exactly1;
1328    if (o == GOF(ESI)     && is4) goto exactly1;
1329    if (o == GOF(EDI)     && is4) goto exactly1;
1330    if (o == GOF(EIP)     && is4) goto none;
1331    if (o == GOF(IP_AT_SYSCALL) && is4) goto none;
1332    if (o == GOF(CC_OP)   && is4) goto none;
1333    if (o == GOF(CC_DEP1) && is4) goto none;
1334    if (o == GOF(CC_DEP2) && is4) goto none;
1335    if (o == GOF(CC_NDEP) && is4) goto none;
1336    if (o == GOF(DFLAG)   && is4) goto none;
1337    if (o == GOF(IDFLAG)  && is4) goto none;
1338    if (o == GOF(ACFLAG)  && is4) goto none;
1339 
1340    /* Partial accesses to integer registers */
1341    if (o == GOF(EAX)     && is21) {         o -= 0; goto contains_o; }
1342    if (o == GOF(EAX)+1   && is21) { o -= 1; o -= 0; goto contains_o; }
1343    if (o == GOF(ECX)     && is21) {         o -= 0; goto contains_o; }
1344    if (o == GOF(ECX)+1   && is21) { o -= 1; o -= 0; goto contains_o; }
1345    if (o == GOF(EBX)     && is21) {         o -= 0; goto contains_o; }
1346    if (o == GOF(EBX)+1   && is21) { o -= 1; o -= 0; goto contains_o; }
1347    if (o == GOF(EDX)     && is21) {         o -= 0; goto contains_o; }
1348    if (o == GOF(EDX)+1   && is21) { o -= 1; o -= 0; goto contains_o; }
1349    if (o == GOF(ESI)     && is21) {         o -= 0; goto contains_o; }
1350    if (o == GOF(EDI)     && is21) {         o -= 0; goto contains_o; }
1351 
1352    /* Segment related guff */
1353    if (o == GOF(GS)  && sz == 2) goto none;
1354    if (o == GOF(LDT) && is4) goto none;
1355    if (o == GOF(GDT) && is4) goto none;
1356 
1357    /* FP admin related */
1358    if (o == GOF(SSEROUND) && is4) goto none;
1359    if (o == GOF(FPROUND)  && is4) goto none;
1360    if (o == GOF(EMWARN)   && is4) goto none;
1361    if (o == GOF(FTOP)     && is4) goto none;
1362    if (o == GOF(FPTAG)    && sz == 8) goto none;
1363    if (o == GOF(FC3210)   && is4) goto none;
1364 
1365    /* xmm registers, including arbitrary sub-parts */
1366    if (o >= GOF(XMM0) && o+sz <= GOF(XMM0)+16) goto none;
1367    if (o >= GOF(XMM1) && o+sz <= GOF(XMM1)+16) goto none;
1368    if (o >= GOF(XMM2) && o+sz <= GOF(XMM2)+16) goto none;
1369    if (o >= GOF(XMM3) && o+sz <= GOF(XMM3)+16) goto none;
1370    if (o >= GOF(XMM4) && o+sz <= GOF(XMM4)+16) goto none;
1371    if (o >= GOF(XMM5) && o+sz <= GOF(XMM5)+16) goto none;
1372    if (o >= GOF(XMM6) && o+sz <= GOF(XMM6)+16) goto none;
1373    if (o >= GOF(XMM7) && o+sz <= GOF(XMM7)+16) goto none;
1374 
1375    /* mmx/x87 registers (a bit of a kludge, since 'o' is not checked
1376       to be exactly equal to one of FPREG[0] .. FPREG[7]) */
1377    if (o >= GOF(FPREG[0]) && o < GOF(FPREG[7])+8 && sz == 8) goto none;
1378 
1379    /* the entire mmx/x87 register bank in one big piece */
1380    if (o == GOF(FPREG) && sz == 64) goto none;
1381 
1382    VG_(printf)("get_IntRegInfo(x86):failing on (%d,%d)\n", o, sz);
1383    tl_assert(0);
1384 #  undef GOF
1385 
1386    /* -------------------- amd64 -------------------- */
1387 
1388 #  elif defined(VGA_amd64)
1389 
1390 #  define GOF(_fieldname) \
1391       (offsetof(VexGuestAMD64State,guest_##_fieldname))
1392 
1393    Int  o     = offset;
1394    Int  sz    = szB;
1395    Bool is421 = sz == 4 || sz == 2 || sz == 1;
1396    Bool is8   = sz == 8;
1397 
1398    tl_assert(sz > 0);
1399    tl_assert(host_is_little_endian());
1400 
1401    /* Set default state to "does not intersect any int register". */
1402    VG_(memset)( iii, 0, sizeof(*iii) );
1403 
1404    /* Exact accesses to integer registers */
1405    if (o == GOF(RAX)     && is8) goto exactly1;
1406    if (o == GOF(RCX)     && is8) goto exactly1;
1407    if (o == GOF(RDX)     && is8) goto exactly1;
1408    if (o == GOF(RBX)     && is8) goto exactly1;
1409    if (o == GOF(RSP)     && is8) goto exactly1;
1410    if (o == GOF(RBP)     && is8) goto exactly1;
1411    if (o == GOF(RSI)     && is8) goto exactly1;
1412    if (o == GOF(RDI)     && is8) goto exactly1;
1413    if (o == GOF(R8)      && is8) goto exactly1;
1414    if (o == GOF(R9)      && is8) goto exactly1;
1415    if (o == GOF(R10)     && is8) goto exactly1;
1416    if (o == GOF(R11)     && is8) goto exactly1;
1417    if (o == GOF(R12)     && is8) goto exactly1;
1418    if (o == GOF(R13)     && is8) goto exactly1;
1419    if (o == GOF(R14)     && is8) goto exactly1;
1420    if (o == GOF(R15)     && is8) goto exactly1;
1421    if (o == GOF(RIP)     && is8) goto exactly1;
1422    if (o == GOF(IP_AT_SYSCALL) && is8) goto none;
1423    if (o == GOF(CC_OP)   && is8) goto none;
1424    if (o == GOF(CC_DEP1) && is8) goto none;
1425    if (o == GOF(CC_DEP2) && is8) goto none;
1426    if (o == GOF(CC_NDEP) && is8) goto none;
1427    if (o == GOF(DFLAG)   && is8) goto none;
1428    if (o == GOF(IDFLAG)  && is8) goto none;
1429 
1430    /* Partial accesses to integer registers */
1431    if (o == GOF(RAX)     && is421) {         o -= 0; goto contains_o; }
1432    if (o == GOF(RAX)+1   && is421) { o -= 1; o -= 0; goto contains_o; }
1433    if (o == GOF(RCX)     && is421) {         o -= 0; goto contains_o; }
1434    if (o == GOF(RCX)+1   && is421) { o -= 1; o -= 0; goto contains_o; }
1435    if (o == GOF(RDX)     && is421) {         o -= 0; goto contains_o; }
1436    if (o == GOF(RDX)+1   && is421) { o -= 1; o -= 0; goto contains_o; }
1437    if (o == GOF(RBX)     && is421) {         o -= 0; goto contains_o; }
1438    if (o == GOF(RBX)+1   && is421) { o -= 1; o -= 0; goto contains_o; }
1439    if (o == GOF(RBP)     && is421) {         o -= 0; goto contains_o; }
1440    if (o == GOF(RSI)     && is421) {         o -= 0; goto contains_o; }
1441    if (o == GOF(RDI)     && is421) {         o -= 0; goto contains_o; }
1442    if (o == GOF(R8)      && is421) {         o -= 0; goto contains_o; }
1443    if (o == GOF(R9)      && is421) {         o -= 0; goto contains_o; }
1444    if (o == GOF(R10)     && is421) {         o -= 0; goto contains_o; }
1445    if (o == GOF(R11)     && is421) {         o -= 0; goto contains_o; }
1446    if (o == GOF(R12)     && is421) {         o -= 0; goto contains_o; }
1447    if (o == GOF(R13)     && is421) {         o -= 0; goto contains_o; }
1448    if (o == GOF(R14)     && is421) {         o -= 0; goto contains_o; }
1449    if (o == GOF(R15)     && is421) {         o -= 0; goto contains_o; }
1450 
1451    /* Segment related guff */
1452    if (o == GOF(FS_ZERO) && is8) goto exactly1;
1453 
1454    /* FP admin related */
1455    if (o == GOF(SSEROUND) && is8) goto none;
1456    if (o == GOF(FPROUND)  && is8) goto none;
1457    if (o == GOF(EMWARN)   && sz == 4) goto none;
1458    if (o == GOF(FTOP)     && sz == 4) goto none;
1459    if (o == GOF(FPTAG)    && is8) goto none;
1460    if (o == GOF(FC3210)   && is8) goto none;
1461 
1462    /* xmm registers, including arbitrary sub-parts */
1463    if (o >= GOF(XMM0)  && o+sz <= GOF(XMM0)+16)  goto none;
1464    if (o >= GOF(XMM1)  && o+sz <= GOF(XMM1)+16)  goto none;
1465    if (o >= GOF(XMM2)  && o+sz <= GOF(XMM2)+16)  goto none;
1466    if (o >= GOF(XMM3)  && o+sz <= GOF(XMM3)+16)  goto none;
1467    if (o >= GOF(XMM4)  && o+sz <= GOF(XMM4)+16)  goto none;
1468    if (o >= GOF(XMM5)  && o+sz <= GOF(XMM5)+16)  goto none;
1469    if (o >= GOF(XMM6)  && o+sz <= GOF(XMM6)+16)  goto none;
1470    if (o >= GOF(XMM7)  && o+sz <= GOF(XMM7)+16)  goto none;
1471    if (o >= GOF(XMM8)  && o+sz <= GOF(XMM8)+16)  goto none;
1472    if (o >= GOF(XMM9)  && o+sz <= GOF(XMM9)+16)  goto none;
1473    if (o >= GOF(XMM10) && o+sz <= GOF(XMM10)+16) goto none;
1474    if (o >= GOF(XMM11) && o+sz <= GOF(XMM11)+16) goto none;
1475    if (o >= GOF(XMM12) && o+sz <= GOF(XMM12)+16) goto none;
1476    if (o >= GOF(XMM13) && o+sz <= GOF(XMM13)+16) goto none;
1477    if (o >= GOF(XMM14) && o+sz <= GOF(XMM14)+16) goto none;
1478    if (o >= GOF(XMM15) && o+sz <= GOF(XMM15)+16) goto none;
1479 
1480    /* mmx/x87 registers (a bit of a kludge, since 'o' is not checked
1481       to be exactly equal to one of FPREG[0] .. FPREG[7]) */
1482    if (o >= GOF(FPREG[0]) && o < GOF(FPREG[7])+8 && sz == 8) goto none;
1483 
1484    VG_(printf)("get_IntRegInfo(amd64):failing on (%d,%d)\n", o, sz);
1485    tl_assert(0);
1486 #  undef GOF
1487 
1488    /* -------------------- ppc32 -------------------- */
1489 
1490 #  elif defined(VGA_ppc32)
1491 
1492 #  define GOF(_fieldname) \
1493       (offsetof(VexGuestPPC32State,guest_##_fieldname))
1494 
1495    Int  o    = offset;
1496    Int  sz   = szB;
1497    Bool is4  = sz == 4;
1498    Bool is8  = sz == 8;
1499 
1500    tl_assert(sz > 0);
1501    tl_assert(host_is_big_endian());
1502 
1503    /* Set default state to "does not intersect any int register". */
1504    VG_(memset)( iii, 0, sizeof(*iii) );
1505 
1506    /* Exact accesses to integer registers */
1507    if (o == GOF(GPR0)  && is4) goto exactly1;
1508    if (o == GOF(GPR1)  && is4) goto exactly1;
1509    if (o == GOF(GPR2)  && is4) goto exactly1;
1510    if (o == GOF(GPR3)  && is4) goto exactly1;
1511    if (o == GOF(GPR4)  && is4) goto exactly1;
1512    if (o == GOF(GPR5)  && is4) goto exactly1;
1513    if (o == GOF(GPR6)  && is4) goto exactly1;
1514    if (o == GOF(GPR7)  && is4) goto exactly1;
1515    if (o == GOF(GPR8)  && is4) goto exactly1;
1516    if (o == GOF(GPR9)  && is4) goto exactly1;
1517    if (o == GOF(GPR10) && is4) goto exactly1;
1518    if (o == GOF(GPR11) && is4) goto exactly1;
1519    if (o == GOF(GPR12) && is4) goto exactly1;
1520    if (o == GOF(GPR13) && is4) goto exactly1;
1521    if (o == GOF(GPR14) && is4) goto exactly1;
1522    if (o == GOF(GPR15) && is4) goto exactly1;
1523    if (o == GOF(GPR16) && is4) goto exactly1;
1524    if (o == GOF(GPR17) && is4) goto exactly1;
1525    if (o == GOF(GPR18) && is4) goto exactly1;
1526    if (o == GOF(GPR19) && is4) goto exactly1;
1527    if (o == GOF(GPR20) && is4) goto exactly1;
1528    if (o == GOF(GPR21) && is4) goto exactly1;
1529    if (o == GOF(GPR22) && is4) goto exactly1;
1530    if (o == GOF(GPR23) && is4) goto exactly1;
1531    if (o == GOF(GPR24) && is4) goto exactly1;
1532    if (o == GOF(GPR25) && is4) goto exactly1;
1533    if (o == GOF(GPR26) && is4) goto exactly1;
1534    if (o == GOF(GPR27) && is4) goto exactly1;
1535    if (o == GOF(GPR28) && is4) goto exactly1;
1536    if (o == GOF(GPR29) && is4) goto exactly1;
1537    if (o == GOF(GPR30) && is4) goto exactly1;
1538    if (o == GOF(GPR31) && is4) goto exactly1;
1539 
1540    /* Misc integer reg and condition code accesses */
1541    if (o == GOF(LR)        && is4) goto exactly1;
1542    if (o == GOF(CTR)       && is4) goto exactly1;
1543    if (o == GOF(CIA)       && is4) goto none;
1544    if (o == GOF(IP_AT_SYSCALL) && is4) goto none;
1545    if (o == GOF(TISTART)   && is4) goto none;
1546    if (o == GOF(TILEN)     && is4) goto none;
1547    if (o == GOF(REDIR_SP)  && is4) goto none;
1548 
1549    if (sz == 1) {
1550       if (o == GOF(XER_SO))  goto none;
1551       if (o == GOF(XER_OV))  goto none;
1552       if (o == GOF(XER_CA))  goto none;
1553       if (o == GOF(XER_BC))  goto none;
1554       if (o == GOF(CR0_321)) goto none;
1555       if (o == GOF(CR0_0))   goto none;
1556       if (o == GOF(CR1_321)) goto none;
1557       if (o == GOF(CR1_0))   goto none;
1558       if (o == GOF(CR2_321)) goto none;
1559       if (o == GOF(CR2_0))   goto none;
1560       if (o == GOF(CR3_321)) goto none;
1561       if (o == GOF(CR3_0))   goto none;
1562       if (o == GOF(CR4_321)) goto none;
1563       if (o == GOF(CR4_0))   goto none;
1564       if (o == GOF(CR5_321)) goto none;
1565       if (o == GOF(CR5_0))   goto none;
1566       if (o == GOF(CR6_321)) goto none;
1567       if (o == GOF(CR6_0))   goto none;
1568       if (o == GOF(CR7_321)) goto none;
1569       if (o == GOF(CR7_0))   goto none;
1570    }
1571 
1572    /* Exact accesses to FP registers */
1573    if (o == GOF(FPR0)  && is8) goto none;
1574    if (o == GOF(FPR1)  && is8) goto none;
1575    if (o == GOF(FPR2)  && is8) goto none;
1576    if (o == GOF(FPR3)  && is8) goto none;
1577    if (o == GOF(FPR4)  && is8) goto none;
1578    if (o == GOF(FPR5)  && is8) goto none;
1579    if (o == GOF(FPR6)  && is8) goto none;
1580    if (o == GOF(FPR7)  && is8) goto none;
1581    if (o == GOF(FPR8)  && is8) goto none;
1582    if (o == GOF(FPR9)  && is8) goto none;
1583    if (o == GOF(FPR10) && is8) goto none;
1584    if (o == GOF(FPR11) && is8) goto none;
1585    if (o == GOF(FPR12) && is8) goto none;
1586    if (o == GOF(FPR13) && is8) goto none;
1587    if (o == GOF(FPR14) && is8) goto none;
1588    if (o == GOF(FPR15) && is8) goto none;
1589    if (o == GOF(FPR16) && is8) goto none;
1590    if (o == GOF(FPR17) && is8) goto none;
1591    if (o == GOF(FPR18) && is8) goto none;
1592    if (o == GOF(FPR19) && is8) goto none;
1593    if (o == GOF(FPR20) && is8) goto none;
1594    if (o == GOF(FPR21) && is8) goto none;
1595    if (o == GOF(FPR22) && is8) goto none;
1596    if (o == GOF(FPR23) && is8) goto none;
1597    if (o == GOF(FPR24) && is8) goto none;
1598    if (o == GOF(FPR25) && is8) goto none;
1599    if (o == GOF(FPR26) && is8) goto none;
1600    if (o == GOF(FPR27) && is8) goto none;
1601    if (o == GOF(FPR28) && is8) goto none;
1602    if (o == GOF(FPR29) && is8) goto none;
1603    if (o == GOF(FPR30) && is8) goto none;
1604    if (o == GOF(FPR31) && is8) goto none;
1605 
1606    /* FP admin related */
1607    if (o == GOF(FPROUND) && is4) goto none;
1608    if (o == GOF(EMWARN)  && is4) goto none;
1609 
1610    /* Altivec registers */
1611    if (o == GOF(VR0)  && sz == 16) goto none;
1612    if (o == GOF(VR1)  && sz == 16) goto none;
1613    if (o == GOF(VR2)  && sz == 16) goto none;
1614    if (o == GOF(VR3)  && sz == 16) goto none;
1615    if (o == GOF(VR4)  && sz == 16) goto none;
1616    if (o == GOF(VR5)  && sz == 16) goto none;
1617    if (o == GOF(VR6)  && sz == 16) goto none;
1618    if (o == GOF(VR7)  && sz == 16) goto none;
1619    if (o == GOF(VR8)  && sz == 16) goto none;
1620    if (o == GOF(VR9)  && sz == 16) goto none;
1621    if (o == GOF(VR10) && sz == 16) goto none;
1622    if (o == GOF(VR11) && sz == 16) goto none;
1623    if (o == GOF(VR12) && sz == 16) goto none;
1624    if (o == GOF(VR13) && sz == 16) goto none;
1625    if (o == GOF(VR14) && sz == 16) goto none;
1626    if (o == GOF(VR15) && sz == 16) goto none;
1627    if (o == GOF(VR16) && sz == 16) goto none;
1628    if (o == GOF(VR17) && sz == 16) goto none;
1629    if (o == GOF(VR18) && sz == 16) goto none;
1630    if (o == GOF(VR19) && sz == 16) goto none;
1631    if (o == GOF(VR20) && sz == 16) goto none;
1632    if (o == GOF(VR21) && sz == 16) goto none;
1633    if (o == GOF(VR22) && sz == 16) goto none;
1634    if (o == GOF(VR23) && sz == 16) goto none;
1635    if (o == GOF(VR24) && sz == 16) goto none;
1636    if (o == GOF(VR25) && sz == 16) goto none;
1637    if (o == GOF(VR26) && sz == 16) goto none;
1638    if (o == GOF(VR27) && sz == 16) goto none;
1639    if (o == GOF(VR28) && sz == 16) goto none;
1640    if (o == GOF(VR29) && sz == 16) goto none;
1641    if (o == GOF(VR30) && sz == 16) goto none;
1642    if (o == GOF(VR31) && sz == 16) goto none;
1643 
1644    /* Altivec admin related */
1645    if (o == GOF(VRSAVE) && is4) goto none;
1646 
1647    VG_(printf)("get_IntRegInfo(ppc32):failing on (%d,%d)\n", o, sz);
1648    tl_assert(0);
1649 #  undef GOF
1650 
1651    /* -------------------- ppc64 -------------------- */
1652 
1653 #  elif defined(VGA_ppc64)
1654 
1655 #  define GOF(_fieldname) \
1656       (offsetof(VexGuestPPC64State,guest_##_fieldname))
1657 
1658    Int  o    = offset;
1659    Int  sz   = szB;
1660    Bool is4  = sz == 4;
1661    Bool is8  = sz == 8;
1662 
1663    tl_assert(sz > 0);
1664    tl_assert(host_is_big_endian());
1665 
1666    /* Set default state to "does not intersect any int register". */
1667    VG_(memset)( iii, 0, sizeof(*iii) );
1668 
1669    /* Exact accesses to integer registers */
1670    if (o == GOF(GPR0)  && is8) goto exactly1;
1671    if (o == GOF(GPR1)  && is8) goto exactly1;
1672    if (o == GOF(GPR2)  && is8) goto exactly1;
1673    if (o == GOF(GPR3)  && is8) goto exactly1;
1674    if (o == GOF(GPR4)  && is8) goto exactly1;
1675    if (o == GOF(GPR5)  && is8) goto exactly1;
1676    if (o == GOF(GPR6)  && is8) goto exactly1;
1677    if (o == GOF(GPR7)  && is8) goto exactly1;
1678    if (o == GOF(GPR8)  && is8) goto exactly1;
1679    if (o == GOF(GPR9)  && is8) goto exactly1;
1680    if (o == GOF(GPR10) && is8) goto exactly1;
1681    if (o == GOF(GPR11) && is8) goto exactly1;
1682    if (o == GOF(GPR12) && is8) goto exactly1;
1683    if (o == GOF(GPR13) && is8) goto exactly1;
1684    if (o == GOF(GPR14) && is8) goto exactly1;
1685    if (o == GOF(GPR15) && is8) goto exactly1;
1686    if (o == GOF(GPR16) && is8) goto exactly1;
1687    if (o == GOF(GPR17) && is8) goto exactly1;
1688    if (o == GOF(GPR18) && is8) goto exactly1;
1689    if (o == GOF(GPR19) && is8) goto exactly1;
1690    if (o == GOF(GPR20) && is8) goto exactly1;
1691    if (o == GOF(GPR21) && is8) goto exactly1;
1692    if (o == GOF(GPR22) && is8) goto exactly1;
1693    if (o == GOF(GPR23) && is8) goto exactly1;
1694    if (o == GOF(GPR24) && is8) goto exactly1;
1695    if (o == GOF(GPR25) && is8) goto exactly1;
1696    if (o == GOF(GPR26) && is8) goto exactly1;
1697    if (o == GOF(GPR27) && is8) goto exactly1;
1698    if (o == GOF(GPR28) && is8) goto exactly1;
1699    if (o == GOF(GPR29) && is8) goto exactly1;
1700    if (o == GOF(GPR30) && is8) goto exactly1;
1701    if (o == GOF(GPR31) && is8) goto exactly1;
1702 
1703    /* Misc integer reg and condition code accesses */
1704    if (o == GOF(LR)        && is8) goto exactly1;
1705    if (o == GOF(CTR)       && is8) goto exactly1;
1706    if (o == GOF(CIA)       && is8) goto none;
1707    if (o == GOF(IP_AT_SYSCALL) && is8) goto none;
1708    if (o == GOF(TISTART)   && is8) goto none;
1709    if (o == GOF(TILEN)     && is8) goto none;
1710    if (o == GOF(REDIR_SP)  && is8) goto none;
1711 
1712    if (sz == 1) {
1713       if (o == GOF(XER_SO))  goto none;
1714       if (o == GOF(XER_OV))  goto none;
1715       if (o == GOF(XER_CA))  goto none;
1716       if (o == GOF(XER_BC))  goto none;
1717       if (o == GOF(CR0_321)) goto none;
1718       if (o == GOF(CR0_0))   goto none;
1719       if (o == GOF(CR1_321)) goto none;
1720       if (o == GOF(CR1_0))   goto none;
1721       if (o == GOF(CR2_321)) goto none;
1722       if (o == GOF(CR2_0))   goto none;
1723       if (o == GOF(CR3_321)) goto none;
1724       if (o == GOF(CR3_0))   goto none;
1725       if (o == GOF(CR4_321)) goto none;
1726       if (o == GOF(CR4_0))   goto none;
1727       if (o == GOF(CR5_321)) goto none;
1728       if (o == GOF(CR5_0))   goto none;
1729       if (o == GOF(CR6_321)) goto none;
1730       if (o == GOF(CR6_0))   goto none;
1731       if (o == GOF(CR7_321)) goto none;
1732       if (o == GOF(CR7_0))   goto none;
1733    }
1734 
1735    /* Exact accesses to FP registers */
1736    if (o == GOF(FPR0)  && is8) goto none;
1737    if (o == GOF(FPR1)  && is8) goto none;
1738    if (o == GOF(FPR2)  && is8) goto none;
1739    if (o == GOF(FPR3)  && is8) goto none;
1740    if (o == GOF(FPR4)  && is8) goto none;
1741    if (o == GOF(FPR5)  && is8) goto none;
1742    if (o == GOF(FPR6)  && is8) goto none;
1743    if (o == GOF(FPR7)  && is8) goto none;
1744    if (o == GOF(FPR8)  && is8) goto none;
1745    if (o == GOF(FPR9)  && is8) goto none;
1746    if (o == GOF(FPR10) && is8) goto none;
1747    if (o == GOF(FPR11) && is8) goto none;
1748    if (o == GOF(FPR12) && is8) goto none;
1749    if (o == GOF(FPR13) && is8) goto none;
1750    if (o == GOF(FPR14) && is8) goto none;
1751    if (o == GOF(FPR15) && is8) goto none;
1752    if (o == GOF(FPR16) && is8) goto none;
1753    if (o == GOF(FPR17) && is8) goto none;
1754    if (o == GOF(FPR18) && is8) goto none;
1755    if (o == GOF(FPR19) && is8) goto none;
1756    if (o == GOF(FPR20) && is8) goto none;
1757    if (o == GOF(FPR21) && is8) goto none;
1758    if (o == GOF(FPR22) && is8) goto none;
1759    if (o == GOF(FPR23) && is8) goto none;
1760    if (o == GOF(FPR24) && is8) goto none;
1761    if (o == GOF(FPR25) && is8) goto none;
1762    if (o == GOF(FPR26) && is8) goto none;
1763    if (o == GOF(FPR27) && is8) goto none;
1764    if (o == GOF(FPR28) && is8) goto none;
1765    if (o == GOF(FPR29) && is8) goto none;
1766    if (o == GOF(FPR30) && is8) goto none;
1767    if (o == GOF(FPR31) && is8) goto none;
1768 
1769    /* FP admin related */
1770    if (o == GOF(FPROUND) && is4) goto none;
1771    if (o == GOF(EMWARN)  && is4) goto none;
1772 
1773    /* Altivec registers */
1774    if (o == GOF(VR0)  && sz == 16) goto none;
1775    if (o == GOF(VR1)  && sz == 16) goto none;
1776    if (o == GOF(VR2)  && sz == 16) goto none;
1777    if (o == GOF(VR3)  && sz == 16) goto none;
1778    if (o == GOF(VR4)  && sz == 16) goto none;
1779    if (o == GOF(VR5)  && sz == 16) goto none;
1780    if (o == GOF(VR6)  && sz == 16) goto none;
1781    if (o == GOF(VR7)  && sz == 16) goto none;
1782    if (o == GOF(VR8)  && sz == 16) goto none;
1783    if (o == GOF(VR9)  && sz == 16) goto none;
1784    if (o == GOF(VR10) && sz == 16) goto none;
1785    if (o == GOF(VR11) && sz == 16) goto none;
1786    if (o == GOF(VR12) && sz == 16) goto none;
1787    if (o == GOF(VR13) && sz == 16) goto none;
1788    if (o == GOF(VR14) && sz == 16) goto none;
1789    if (o == GOF(VR15) && sz == 16) goto none;
1790    if (o == GOF(VR16) && sz == 16) goto none;
1791    if (o == GOF(VR17) && sz == 16) goto none;
1792    if (o == GOF(VR18) && sz == 16) goto none;
1793    if (o == GOF(VR19) && sz == 16) goto none;
1794    if (o == GOF(VR20) && sz == 16) goto none;
1795    if (o == GOF(VR21) && sz == 16) goto none;
1796    if (o == GOF(VR22) && sz == 16) goto none;
1797    if (o == GOF(VR23) && sz == 16) goto none;
1798    if (o == GOF(VR24) && sz == 16) goto none;
1799    if (o == GOF(VR25) && sz == 16) goto none;
1800    if (o == GOF(VR26) && sz == 16) goto none;
1801    if (o == GOF(VR27) && sz == 16) goto none;
1802    if (o == GOF(VR28) && sz == 16) goto none;
1803    if (o == GOF(VR29) && sz == 16) goto none;
1804    if (o == GOF(VR30) && sz == 16) goto none;
1805    if (o == GOF(VR31) && sz == 16) goto none;
1806 
1807    /* Altivec admin related */
1808    if (o == GOF(VRSAVE) && is4) goto none;
1809 
1810    VG_(printf)("get_IntRegInfo(ppc64):failing on (%d,%d)\n", o, sz);
1811    tl_assert(0);
1812 #  undef GOF
1813 
1814    /* -------------------- arm -------------------- */
1815 
1816 #  elif defined(VGA_arm)
1817 
1818 #  define GOF(_fieldname) \
1819       (offsetof(VexGuestARMState,guest_##_fieldname))
1820 
1821    Int  o    = offset;
1822    Int  sz   = szB;
1823    Bool is4  = sz == 4;
1824    Bool is8  = sz == 8;
1825 
1826    tl_assert(sz > 0);
1827    tl_assert(host_is_big_endian());
1828 
1829    /* Set default state to "does not intersect any int register". */
1830    VG_(memset)( iii, 0, sizeof(*iii) );
1831 
1832    VG_(printf)("get_IntRegInfo(arm):failing on (%d,%d)\n", o, sz);
1833    tl_assert(0);
1834 
1835 
1836 #  else
1837 #    error "FIXME: not implemented for this architecture"
1838 #  endif
1839 
1840   exactly1:
1841    iii->n_offsets = -1;
1842    return;
1843   none:
1844    iii->n_offsets = 0;
1845    return;
1846   contains_o:
1847    tl_assert(o >= 0 && 0 == (o % sizeof(UWord)));
1848    iii->n_offsets = 1;
1849    iii->offsets[0] = o;
1850    return;
1851 }
1852 
1853 
1854 /* Does 'arr' describe an indexed guest state section containing host
1855    words, that we want to shadow? */
1856 
is_integer_guest_reg_array(IRRegArray * arr)1857 static Bool is_integer_guest_reg_array ( IRRegArray* arr )
1858 {
1859    /* --------------------- x86 --------------------- */
1860 #  if defined(VGA_x86)
1861    /* The x87 tag array. */
1862    if (arr->base == offsetof(VexGuestX86State,guest_FPTAG[0])
1863        && arr->elemTy == Ity_I8 && arr->nElems == 8)
1864       return False;
1865    /* The x87 register array. */
1866    if (arr->base == offsetof(VexGuestX86State,guest_FPREG[0])
1867        && arr->elemTy == Ity_F64 && arr->nElems == 8)
1868       return False;
1869 
1870    VG_(printf)("is_integer_guest_reg_array(x86): unhandled: ");
1871    ppIRRegArray(arr);
1872    VG_(printf)("\n");
1873    tl_assert(0);
1874 
1875    /* -------------------- amd64 -------------------- */
1876 #  elif defined(VGA_amd64)
1877    /* The x87 tag array. */
1878    if (arr->base == offsetof(VexGuestAMD64State,guest_FPTAG[0])
1879        && arr->elemTy == Ity_I8 && arr->nElems == 8)
1880       return False;
1881    /* The x87 register array. */
1882    if (arr->base == offsetof(VexGuestAMD64State,guest_FPREG[0])
1883        && arr->elemTy == Ity_F64 && arr->nElems == 8)
1884       return False;
1885 
1886    VG_(printf)("is_integer_guest_reg_array(amd64): unhandled: ");
1887    ppIRRegArray(arr);
1888    VG_(printf)("\n");
1889    tl_assert(0);
1890 
1891    /* -------------------- ppc32 -------------------- */
1892 #  elif defined(VGA_ppc32)
1893    /* The redir stack. */
1894    if (arr->base == offsetof(VexGuestPPC32State,guest_REDIR_STACK[0])
1895        && arr->elemTy == Ity_I32
1896        && arr->nElems == VEX_GUEST_PPC32_REDIR_STACK_SIZE)
1897       return True;
1898 
1899    VG_(printf)("is_integer_guest_reg_array(ppc32): unhandled: ");
1900    ppIRRegArray(arr);
1901    VG_(printf)("\n");
1902    tl_assert(0);
1903 
1904    /* -------------------- ppc64 -------------------- */
1905 #  elif defined(VGA_ppc64)
1906    /* The redir stack. */
1907    if (arr->base == offsetof(VexGuestPPC64State,guest_REDIR_STACK[0])
1908        && arr->elemTy == Ity_I64
1909        && arr->nElems == VEX_GUEST_PPC64_REDIR_STACK_SIZE)
1910       return True;
1911 
1912    VG_(printf)("is_integer_guest_reg_array(ppc64): unhandled: ");
1913    ppIRRegArray(arr);
1914    VG_(printf)("\n");
1915    tl_assert(0);
1916 
1917    /* -------------------- arm -------------------- */
1918 #  elif defined(VGA_arm)
1919    /* There are no rotating register sections on ARM. */
1920    VG_(printf)("is_integer_guest_reg_array(arm): unhandled: ");
1921    ppIRRegArray(arr);
1922    VG_(printf)("\n");
1923    tl_assert(0);
1924 
1925 #  else
1926 #    error "FIXME: not implemented for this architecture"
1927 #  endif
1928 }
1929 
1930 
1931 // END move this uglyness to pc_machine.c
1932 
1933 /* returns True iff given slice exactly matches an int reg.  Merely
1934    a convenience wrapper around get_IntRegInfo. */
is_integer_guest_reg(Int offset,Int szB)1935 static Bool is_integer_guest_reg ( Int offset, Int szB )
1936 {
1937    IntRegInfo iii;
1938    get_IntRegInfo( &iii, offset, szB );
1939    tl_assert(iii.n_offsets >= -1 && iii.n_offsets <= N_INTREGINFO_OFFSETS);
1940    return iii.n_offsets == -1;
1941 }
1942 
1943 /* these assume guest and host have the same endianness and
1944    word size (probably). */
get_guest_intreg(ThreadId tid,Int shadowNo,PtrdiffT offset,SizeT size)1945 static UWord get_guest_intreg ( ThreadId tid, Int shadowNo,
1946                                 PtrdiffT offset, SizeT size )
1947 {
1948    UChar tmp[ 2 + sizeof(UWord) ];
1949    tl_assert(size == sizeof(UWord));
1950    tl_assert(0 == (offset % sizeof(UWord)));
1951    VG_(memset)(tmp, 0, sizeof(tmp));
1952    tmp[0] = 0x31;
1953    tmp[ sizeof(tmp)-1 ] = 0x27;
1954    VG_(get_shadow_regs_area)(tid, &tmp[1], shadowNo, offset, size);
1955    tl_assert(tmp[0] == 0x31);
1956    tl_assert(tmp[ sizeof(tmp)-1 ] == 0x27);
1957    return * ((UWord*) &tmp[1] ); /* MISALIGNED LOAD */
1958 }
put_guest_intreg(ThreadId tid,Int shadowNo,PtrdiffT offset,SizeT size,UWord w)1959 static void put_guest_intreg ( ThreadId tid, Int shadowNo,
1960                                PtrdiffT offset, SizeT size, UWord w )
1961 {
1962    tl_assert(size == sizeof(UWord));
1963    tl_assert(0 == (offset % sizeof(UWord)));
1964    VG_(set_shadow_regs_area)(tid, shadowNo, offset, size,
1965                              (const UChar*)&w);
1966 }
1967 
1968 /* Initialise the integer shadow registers to UNKNOWN.  This is a bit
1969    of a nasty kludge, but it does mean we don't need to know which
1970    registers we really need to initialise -- simply assume that all
1971    integer registers will be naturally aligned w.r.t. the start of the
1972    guest state, and fill in all possible entries. */
init_shadow_registers(ThreadId tid)1973 static void init_shadow_registers ( ThreadId tid )
1974 {
1975    Int i, wordSzB = sizeof(UWord);
1976    for (i = 0; i < PC_SIZEOF_GUEST_STATE-wordSzB; i += wordSzB) {
1977       put_guest_intreg( tid, 1, i, wordSzB, (UWord)UNKNOWN );
1978    }
1979 }
1980 
post_reg_write_nonptr(ThreadId tid,PtrdiffT offset,SizeT size)1981 static void post_reg_write_nonptr ( ThreadId tid, PtrdiffT offset, SizeT size )
1982 {
1983    // syscall_return: Default is non-pointer.  If it really is a pointer
1984    // (eg. for mmap()), SK_(post_syscall) sets it again afterwards.
1985    //
1986    // clientreq_return: All the global client requests return non-pointers
1987    // (except possibly CLIENT_CALL[0123], but they're handled by
1988    // post_reg_write_clientcall, not here).
1989    //
1990    if (is_integer_guest_reg( (Int)offset, (Int)size )) {
1991       put_guest_intreg( tid, 1, offset, size, (UWord)NONPTR );
1992    }
1993    else
1994    if (size == 1 || size == 2) {
1995       /* can't possibly be an integer guest reg.  Ignore. */
1996    }
1997    else {
1998       // DDD: on Darwin, this assertion fails because we currently do a
1999       // 'post_reg_write' on the 'guest_CC_DEP1' pseudo-register.
2000       // JRS 2009July13: we should change is_integer_guest_reg()
2001       // to accept guest_CC_DEP* and guest_CC_NDEP
2002       // as legitimate pointer-holding registers
2003       tl_assert(0);
2004    }
2005    //   VG_(set_thread_shadow_archreg)( tid, reg, (UInt)NONPTR );
2006 }
2007 
post_reg_write_nonptr_or_unknown(ThreadId tid,PtrdiffT offset,SizeT size)2008 static void post_reg_write_nonptr_or_unknown ( ThreadId tid,
2009                                                PtrdiffT offset, SizeT size )
2010 {
2011    // deliver_signal: called from two places; one sets the reg to zero, the
2012    // other sets the stack pointer.
2013    //
2014    if (is_integer_guest_reg( (Int)offset, (Int)size )) {
2015       put_guest_intreg(
2016          tid, 1/*shadowno*/, offset, size,
2017          (UWord)nonptr_or_unknown(
2018                    get_guest_intreg( tid, 0/*shadowno*/,
2019                                      offset, size )));
2020    } else {
2021       tl_assert(0);
2022    }
2023 }
2024 
h_post_reg_write_demux(CorePart part,ThreadId tid,PtrdiffT guest_state_offset,SizeT size)2025 void h_post_reg_write_demux ( CorePart part, ThreadId tid,
2026                               PtrdiffT guest_state_offset, SizeT size)
2027 {
2028    if (0)
2029    VG_(printf)("post_reg_write_demux: tid %d part %d off %ld size %ld\n",
2030                (Int)tid, (Int)part,
2031               guest_state_offset, size);
2032    switch (part) {
2033       case Vg_CoreStartup:
2034          /* This is a bit of a kludge since for any Vg_CoreStartup
2035             event we overwrite the entire shadow register set.  But
2036             that's ok - we're only called once with
2037             part==Vg_CoreStartup event, and in that case the supplied
2038             offset & size cover the entire guest state anyway. */
2039          init_shadow_registers(tid);
2040          break;
2041       case Vg_CoreSysCall:
2042          if (0) VG_(printf)("ZZZZZZZ p_r_w    -> NONPTR\n");
2043          post_reg_write_nonptr( tid, guest_state_offset, size );
2044          break;
2045       case Vg_CoreClientReq:
2046          post_reg_write_nonptr( tid, guest_state_offset, size );
2047          break;
2048       case Vg_CoreSignal:
2049          post_reg_write_nonptr_or_unknown( tid, guest_state_offset, size );
2050          break;
2051       default:
2052          tl_assert(0);
2053    }
2054 }
2055 
h_post_reg_write_clientcall(ThreadId tid,PtrdiffT guest_state_offset,SizeT size,Addr f)2056 void h_post_reg_write_clientcall(ThreadId tid, PtrdiffT guest_state_offset,
2057                                  SizeT size, Addr f )
2058 {
2059    UWord p;
2060 
2061    // Having to do this is a bit nasty...
2062    if (f == (Addr)h_replace_malloc
2063        || f == (Addr)h_replace___builtin_new
2064        || f == (Addr)h_replace___builtin_vec_new
2065        || f == (Addr)h_replace_calloc
2066        || f == (Addr)h_replace_memalign
2067        || f == (Addr)h_replace_realloc)
2068    {
2069       // We remembered the last added segment;  make sure it's the right one.
2070       /* What's going on: at this point, the scheduler has just called
2071          'f' -- one of our malloc replacement functions -- and it has
2072          returned.  The return value has been written to the guest
2073          state of thread 'tid', offset 'guest_state_offset' length
2074          'size'.  We need to look at that return value and set the
2075          shadow return value accordingly.  The shadow return value
2076          required is handed to us "under the counter" through the
2077          global variable 'last_seg_added'.  This is all very ugly, not
2078          to mention, non-thread-safe should V ever become
2079          multithreaded. */
2080       /* assert the place where the return value is is a legit int reg */
2081       tl_assert(is_integer_guest_reg(guest_state_offset, size));
2082       /* Now we need to look at the returned value, to see whether the
2083          malloc succeeded or not. */
2084       p = get_guest_intreg(tid, 0/*non-shadow*/, guest_state_offset, size);
2085       if ((UWord)NULL == p) {
2086          // if alloc failed, eg. realloc on bogus pointer
2087          put_guest_intreg(tid, 1/*first-shadow*/,
2088                           guest_state_offset, size, (UWord)NONPTR );
2089       } else {
2090          // alloc didn't fail.  Check we have the correct segment.
2091          tl_assert(p == last_seg_added->addr);
2092          put_guest_intreg(tid, 1/*first-shadow*/,
2093                           guest_state_offset, size, (UWord)last_seg_added );
2094       }
2095    }
2096    else if (f == (Addr)h_replace_free
2097             || f == (Addr)h_replace___builtin_delete
2098             || f == (Addr)h_replace___builtin_vec_delete
2099          // || f == (Addr)VG_(cli_block_size)
2100             || f == (Addr)VG_(message))
2101    {
2102       // Probably best to set the (non-existent!) return value to
2103       // non-pointer.
2104       tl_assert(is_integer_guest_reg(guest_state_offset, size));
2105       put_guest_intreg(tid, 1/*first-shadow*/,
2106                        guest_state_offset, size, (UWord)NONPTR );
2107    }
2108    else {
2109       // Anything else, probably best to set return value to non-pointer.
2110       //VG_(set_thread_shadow_archreg)(tid, reg, (UInt)UNKNOWN);
2111       Char fbuf[100];
2112       VG_(printf)("f = %#lx\n", f);
2113       VG_(get_fnname)(f, fbuf, 100);
2114       VG_(printf)("name = %s\n", fbuf);
2115       VG_(tool_panic)("argh: clientcall");
2116    }
2117 }
2118 
2119 
2120 //zz /*--------------------------------------------------------------------*/
2121 //zz /*--- Sanity checking                                              ---*/
2122 //zz /*--------------------------------------------------------------------*/
2123 //zz
2124 //zz /* Check that nobody has spuriously claimed that the first or last 16
2125 //zz    pages (64 KB) of address space have become accessible.  Failure of
2126 //zz    the following do not per se indicate an internal consistency
2127 //zz    problem, but they are so likely to that we really want to know
2128 //zz    about it if so. */
2129 //zz Bool pc_replace_cheap_sanity_check) ( void )
2130 //zz {
2131 //zz    if (IS_DISTINGUISHED_SM(primary_map[0])
2132 //zz        /* kludge: kernel drops a page up at top of address range for
2133 //zz           magic "optimized syscalls", so we can no longer check the
2134 //zz           highest page */
2135 //zz        /* && IS_DISTINGUISHED_SM(primary_map[65535]) */
2136 //zz       )
2137 //zz       return True;
2138 //zz    else
2139 //zz       return False;
2140 //zz }
2141 //zz
2142 //zz Bool SK_(expensive_sanity_check) ( void )
2143 //zz {
2144 //zz    Int i;
2145 //zz
2146 //zz    /* Make sure nobody changed the distinguished secondary. */
2147 //zz    for (i = 0; i < SEC_MAP_WORDS; i++)
2148 //zz       if (distinguished_secondary_map.vseg[i] != UNKNOWN)
2149 //zz          return False;
2150 //zz
2151 //zz    return True;
2152 //zz }
2153 
2154 
2155 /*--------------------------------------------------------------------*/
2156 /*--- System calls                                                 ---*/
2157 /*--------------------------------------------------------------------*/
2158 
h_pre_syscall(ThreadId tid,UInt sysno,UWord * args,UInt nArgs)2159 void h_pre_syscall ( ThreadId tid, UInt sysno,
2160                      UWord* args, UInt nArgs )
2161 {
2162    /* we don't do anything at the pre-syscall point */
2163 }
2164 
2165 /* The post-syscall table is a table of pairs (number, flag).
2166 
2167    'flag' is only ever zero or one.  If it is zero, it indicates that
2168    default handling for that syscall is required -- namely that the
2169    syscall is deemed to return NONPTR.  This is the case for the vast
2170    majority of syscalls.  If it is one then some special
2171    syscall-specific handling is is required.  No further details of it
2172    are stored in the table.
2173 
2174    On Linux and Darwin, 'number' is a __NR_xxx constant.
2175 
2176    On AIX5, 'number' is an Int*, which points to the Int variable
2177    holding the currently assigned number for this syscall.
2178 
2179    When querying the table, we compare the supplied syscall number
2180    with the 'number' field (directly on Linux and Darwin, after
2181    dereferencing on AIX5), to find the relevant entry.  This requires a
2182    linear search of the table.  To stop the costs getting too high, the
2183    table is incrementally rearranged after each search, to move commonly
2184    requested items a bit closer to the front.
2185 
2186    The table is built once, the first time it is used.  After that we
2187    merely query it (and reorder the entries as a result). */
2188 
2189 static XArray* /* of UWordPair */ post_syscall_table = NULL;
2190 
setup_post_syscall_table(void)2191 static void setup_post_syscall_table ( void )
2192 {
2193    tl_assert(!post_syscall_table);
2194    post_syscall_table = VG_(newXA)( VG_(malloc), "pc.h_main.spst.1",
2195                                     VG_(free), sizeof(UWordPair) );
2196    tl_assert(post_syscall_table);
2197 
2198    /* --------------- LINUX --------------- */
2199 
2200 #  if defined(VGO_linux)
2201 
2202 #     define ADD(_flag, _syscallname) \
2203          do { UWordPair p; p.uw1 = (_syscallname); p.uw2 = (_flag); \
2204               VG_(addToXA)( post_syscall_table, &p ); \
2205          } while (0)
2206 
2207       /* These ones definitely don't return pointers.  They're not
2208          particularly grammatical, either. */
2209 
2210 #     if defined(__NR__llseek)
2211       ADD(0, __NR__llseek);
2212 #     endif
2213       ADD(0, __NR__sysctl);
2214 #     if defined(__NR__newselect)
2215       ADD(0, __NR__newselect);
2216 #     endif
2217 #     if defined(__NR_accept)
2218       ADD(0, __NR_accept);
2219 #     endif
2220       ADD(0, __NR_access);
2221       ADD(0, __NR_alarm);
2222 #     if defined(__NR_bind)
2223       ADD(0, __NR_bind);
2224 #     endif
2225 #     if defined(__NR_chdir)
2226       ADD(0, __NR_chdir);
2227 #     endif
2228       ADD(0, __NR_chmod);
2229       ADD(0, __NR_chown);
2230 #     if defined(__NR_chown32)
2231       ADD(0, __NR_chown32);
2232 #     endif
2233       ADD(0, __NR_clock_getres);
2234       ADD(0, __NR_clock_gettime);
2235       ADD(0, __NR_clone);
2236       ADD(0, __NR_close);
2237 #     if defined(__NR_connect)
2238       ADD(0, __NR_connect);
2239 #     endif
2240       ADD(0, __NR_creat);
2241       ADD(0, __NR_dup);
2242       ADD(0, __NR_dup2);
2243       ADD(0, __NR_epoll_create);
2244 #     if defined(__NR_epoll_create1)
2245       ADD(0, __NR_epoll_create1);
2246 #     endif
2247       ADD(0, __NR_epoll_ctl);
2248 #     if defined(__NR_epoll_pwait)
2249       ADD(0, __NR_epoll_pwait);
2250 #     endif
2251       ADD(0, __NR_epoll_wait);
2252       ADD(0, __NR_execve); /* presumably we see this because the call failed? */
2253       ADD(0, __NR_exit); /* hmm, why are we still alive? */
2254       ADD(0, __NR_exit_group);
2255       ADD(0, __NR_fadvise64);
2256       ADD(0, __NR_fallocate);
2257       ADD(0, __NR_fchmod);
2258       ADD(0, __NR_fchown);
2259 #     if defined(__NR_fchown32)
2260       ADD(0, __NR_fchown32);
2261 #     endif
2262       ADD(0, __NR_fcntl);
2263 #     if defined(__NR_fcntl64)
2264       ADD(0, __NR_fcntl64);
2265 #     endif
2266       ADD(0, __NR_fdatasync);
2267       ADD(0, __NR_flock);
2268       ADD(0, __NR_fstat);
2269 #     if defined(__NR_fstat64)
2270       ADD(0, __NR_fstat64);
2271 #     endif
2272       ADD(0, __NR_fstatfs);
2273  #     if defined(__NR_fstatfs64)
2274       ADD(0, __NR_fstatfs64);
2275 #     endif
2276      ADD(0, __NR_fsync);
2277       ADD(0, __NR_ftruncate);
2278 #     if defined(__NR_ftruncate64)
2279       ADD(0, __NR_ftruncate64);
2280 #     endif
2281       ADD(0, __NR_futex);
2282       ADD(0, __NR_getcwd);
2283       ADD(0, __NR_getdents); // something to do with teeth
2284       ADD(0, __NR_getdents64);
2285       ADD(0, __NR_getegid);
2286 #     if defined(__NR_getegid32)
2287       ADD(0, __NR_getegid32);
2288 #     endif
2289       ADD(0, __NR_geteuid);
2290 #     if defined(__NR_geteuid32)
2291       ADD(0, __NR_geteuid32);
2292 #     endif
2293       ADD(0, __NR_getgid);
2294 #     if defined(__NR_getgid32)
2295       ADD(0, __NR_getgid32);
2296 #     endif
2297       ADD(0, __NR_getgroups);
2298 #     if defined(__NR_getgroups32)
2299       ADD(0, __NR_getgroups32);
2300 #     endif
2301       ADD(0, __NR_getitimer);
2302 #     if defined(__NR_getpeername)
2303       ADD(0, __NR_getpeername);
2304 #     endif
2305       ADD(0, __NR_getpid);
2306       ADD(0, __NR_getpgrp);
2307       ADD(0, __NR_getppid);
2308       ADD(0, __NR_getpriority);
2309       ADD(0, __NR_getresgid);
2310 #     if defined(__NR_getresgid32)
2311       ADD(0, __NR_getresgid32);
2312 #     endif
2313       ADD(0, __NR_getresuid);
2314 #     if defined(__NR_getresuid32)
2315       ADD(0, __NR_getresuid32);
2316 #     endif
2317       ADD(0, __NR_getrlimit);
2318       ADD(0, __NR_getrusage);
2319       ADD(0, __NR_getsid);
2320 #     if defined(__NR_getsockname)
2321       ADD(0, __NR_getsockname);
2322 #     endif
2323 #     if defined(__NR_getsockopt)
2324       ADD(0, __NR_getsockopt);
2325 #     endif
2326       ADD(0, __NR_gettid);
2327       ADD(0, __NR_gettimeofday);
2328       ADD(0, __NR_getuid);
2329 #     if defined(__NR_getuid32)
2330       ADD(0, __NR_getuid32);
2331 #     endif
2332       ADD(0, __NR_getxattr);
2333 #     if defined(__NR_ioperm)
2334       ADD(0, __NR_ioperm);
2335 #     endif
2336       ADD(0, __NR_inotify_add_watch);
2337       ADD(0, __NR_inotify_init);
2338 #     if defined(__NR_inotify_init1)
2339       ADD(0, __NR_inotify_init1);
2340 #     endif
2341       ADD(0, __NR_inotify_rm_watch);
2342       ADD(0, __NR_ioctl); // ioctl -- assuming no pointers returned
2343       ADD(0, __NR_ioprio_get);
2344       ADD(0, __NR_kill);
2345       ADD(0, __NR_lgetxattr);
2346       ADD(0, __NR_link);
2347 #     if defined(__NR_listen)
2348       ADD(0, __NR_listen);
2349 #     endif
2350       ADD(0, __NR_lseek);
2351       ADD(0, __NR_lstat);
2352 #     if defined(__NR_lstat64)
2353       ADD(0, __NR_lstat64);
2354 #     endif
2355       ADD(0, __NR_madvise);
2356       ADD(0, __NR_mkdir);
2357       ADD(0, __NR_mlock);
2358       ADD(0, __NR_mlockall);
2359       ADD(0, __NR_mprotect);
2360 #     if defined(__NR_mq_open)
2361       ADD(0, __NR_mq_open);
2362       ADD(0, __NR_mq_unlink);
2363       ADD(0, __NR_mq_timedsend);
2364       ADD(0, __NR_mq_timedreceive);
2365       ADD(0, __NR_mq_notify);
2366       ADD(0, __NR_mq_getsetattr);
2367 #     endif
2368       ADD(0, __NR_munmap); // die_mem_munmap already called, segment remove);
2369       ADD(0, __NR_nanosleep);
2370       ADD(0, __NR_open);
2371       ADD(0, __NR_personality);
2372       ADD(0, __NR_pipe);
2373 #     if defined(__NR_pipe2)
2374       ADD(0, __NR_pipe2);
2375 #     endif
2376       ADD(0, __NR_poll);
2377       ADD(0, __NR_prctl);
2378       ADD(0, __NR_pread64);
2379       ADD(0, __NR_pwrite64);
2380       ADD(0, __NR_read);
2381       ADD(0, __NR_readlink);
2382       ADD(0, __NR_readv);
2383 #     if defined(__NR_recvfrom)
2384       ADD(0, __NR_recvfrom);
2385 #     endif
2386 #     if defined(__NR_recvmsg)
2387       ADD(0, __NR_recvmsg);
2388 #     endif
2389       ADD(0, __NR_rename);
2390       ADD(0, __NR_rmdir);
2391       ADD(0, __NR_rt_sigaction);
2392       ADD(0, __NR_rt_sigprocmask);
2393       ADD(0, __NR_rt_sigreturn); /* not sure if we should see this or not */
2394       ADD(0, __NR_rt_sigsuspend);
2395       ADD(0, __NR_rt_sigtimedwait);
2396       ADD(0, __NR_sched_get_priority_max);
2397       ADD(0, __NR_sched_get_priority_min);
2398       ADD(0, __NR_sched_getaffinity);
2399       ADD(0, __NR_sched_getparam);
2400       ADD(0, __NR_sched_getscheduler);
2401       ADD(0, __NR_sched_setaffinity);
2402       ADD(0, __NR_sched_setscheduler);
2403       ADD(0, __NR_sched_yield);
2404       ADD(0, __NR_select);
2405 #     if defined(__NR_semctl)
2406       ADD(0, __NR_semctl);
2407 #     endif
2408 #     if defined(__NR_semget)
2409       ADD(0, __NR_semget);
2410 #     endif
2411 #     if defined(__NR_semop)
2412       ADD(0, __NR_semop);
2413 #     endif
2414       ADD(0, __NR_sendfile);
2415 #     if defined(__NR_sendto)
2416       ADD(0, __NR_sendto);
2417 #     endif
2418 #     if defined(__NR_sendmsg)
2419       ADD(0, __NR_sendmsg);
2420 #     endif
2421       ADD(0, __NR_set_robust_list);
2422 #     if defined(__NR_set_thread_area)
2423       ADD(0, __NR_set_thread_area);
2424 #     endif
2425       ADD(0, __NR_set_tid_address);
2426       ADD(0, __NR_setgid);
2427       ADD(0, __NR_setfsgid);
2428       ADD(0, __NR_setfsuid);
2429       ADD(0, __NR_setgid);
2430       ADD(0, __NR_setgroups);
2431       ADD(0, __NR_setitimer);
2432       ADD(0, __NR_setpgid);
2433       ADD(0, __NR_setpriority);
2434       ADD(0, __NR_setregid);
2435       ADD(0, __NR_setresgid);
2436       ADD(0, __NR_setresuid);
2437       ADD(0, __NR_setreuid);
2438       ADD(0, __NR_setrlimit);
2439       ADD(0, __NR_setsid);
2440 #     if defined(__NR_setsockopt)
2441       ADD(0, __NR_setsockopt);
2442 #     endif
2443       ADD(0, __NR_setuid);
2444 #     if defined(__NR_shmctl)
2445       ADD(0, __NR_shmctl);
2446       ADD(0, __NR_shmdt);
2447 #     endif
2448 #     if defined(__NR_shutdown)
2449       ADD(0, __NR_shutdown);
2450 #     endif
2451       ADD(0, __NR_sigaltstack);
2452 #     if defined(__NR_socket)
2453       ADD(0, __NR_socket);
2454 #     endif
2455 #     if defined(__NR_socketcall)
2456       ADD(0, __NR_socketcall); /* the nasty x86-linux socket multiplexor */
2457 #     endif
2458 #     if defined(__NR_socketpair)
2459       ADD(0, __NR_socketpair);
2460 #     endif
2461 #     if defined(__NR_statfs64)
2462       ADD(0, __NR_statfs64);
2463 #     endif
2464 #     if defined(__NR_sigreturn)
2465       ADD(0, __NR_sigreturn); /* not sure if we should see this or not */
2466 #     endif
2467 #     if defined(__NR_stat64)
2468       ADD(0, __NR_stat64);
2469 #     endif
2470       ADD(0, __NR_stat);
2471       ADD(0, __NR_statfs);
2472       ADD(0, __NR_symlink);
2473       ADD(0, __NR_sysinfo);
2474       ADD(0, __NR_tgkill);
2475       ADD(0, __NR_time);
2476       ADD(0, __NR_times);
2477       ADD(0, __NR_truncate);
2478 #     if defined(__NR_truncate64)
2479       ADD(0, __NR_truncate64);
2480 #     endif
2481 #     if defined(__NR_ugetrlimit)
2482       ADD(0, __NR_ugetrlimit);
2483 #     endif
2484       ADD(0, __NR_umask);
2485       ADD(0, __NR_uname);
2486       ADD(0, __NR_unlink);
2487       ADD(0, __NR_utime);
2488       ADD(0, __NR_vfork);
2489 #     if defined(__NR_waitpid)
2490       ADD(0, __NR_waitpid);
2491 #     endif
2492       ADD(0, __NR_wait4);
2493       ADD(0, __NR_write);
2494       ADD(0, __NR_writev);
2495 
2496       /* Whereas the following need special treatment */
2497 #     if defined(__NR_arch_prctl)
2498       ADD(1, __NR_arch_prctl);
2499 #     endif
2500       ADD(1, __NR_brk);
2501       ADD(1, __NR_mmap);
2502 #     if defined(__NR_mmap2)
2503       ADD(1, __NR_mmap2);
2504 #     endif
2505 #     if defined(__NR_shmat)
2506       ADD(1, __NR_shmat);
2507 #     endif
2508 #     if defined(__NR_shmget)
2509       ADD(1, __NR_shmget);
2510 #     endif
2511 #     if defined(__NR_ipc) && defined(VKI_SHMAT)
2512       ADD(1, __NR_ipc); /* ppc{32,64}-linux horrors */
2513 #     endif
2514 
2515    /* --------------- AIX5 --------------- */
2516 
2517 #  elif defined(VGO_aix5)
2518 
2519 #     define ADD(_flag, _syscallname) \
2520          do { \
2521             UWordPair p; \
2522             if ((_syscallname) != __NR_AIX5_UNKNOWN) { \
2523                p.uw1 = (UWord)&(_syscallname); p.uw2 = (_flag); \
2524                VG_(addToXA)( post_syscall_table, &p ); \
2525             } \
2526          } while (0)
2527 
2528       /* Just a minimal set of handlers, enough to make
2529          a 32- and 64-bit hello-world program run. */
2530       ADD(1, __NR_AIX5___loadx); /* not sure what to do here */
2531       ADD(0, __NR_AIX5__exit);
2532       ADD(0, __NR_AIX5_access);
2533       ADD(0, __NR_AIX5_getgidx);
2534       ADD(0, __NR_AIX5_getuidx);
2535       ADD(0, __NR_AIX5_kfcntl);
2536       ADD(0, __NR_AIX5_kioctl);
2537       ADD(1, __NR_AIX5_kload); /* not sure what to do here */
2538       ADD(0, __NR_AIX5_kwrite);
2539 
2540    /* --------------- DARWIN ------------- */
2541 
2542 #  elif defined(VGO_darwin)
2543 
2544 #     define ADD(_flag, _syscallname) \
2545          do { UWordPair p; p.uw1 = (_syscallname); p.uw2 = (_flag); \
2546               VG_(addToXA)( post_syscall_table, &p ); \
2547          } while (0)
2548 
2549       // DDD: a desultory attempt thus far...
2550 
2551       // Unix/BSD syscalls.
2552 
2553       // Mach traps.
2554       ADD(0, __NR_host_self_trap);
2555       ADD(0, __NR_mach_msg_trap);
2556       ADD(0, __NR_mach_reply_port);
2557       ADD(0, __NR_task_self_trap);
2558 
2559       // Machine-dependent syscalls.
2560       ADD(0, __NR_thread_fast_set_cthread_self);
2561 
2562    /* ------------------------------------ */
2563 
2564 #  else
2565 #     error "Unsupported OS"
2566 #  endif
2567 
2568 #  undef ADD
2569 }
2570 
2571 
h_post_syscall(ThreadId tid,UInt sysno,UWord * args,UInt nArgs,SysRes res)2572 void h_post_syscall ( ThreadId tid, UInt sysno,
2573                       UWord* args, UInt nArgs, SysRes res )
2574 {
2575    Word i, n;
2576    UWordPair* pair;
2577 
2578    if (!post_syscall_table)
2579       setup_post_syscall_table();
2580 
2581    /* search for 'sysno' in the post_syscall_table */
2582    n = VG_(sizeXA)( post_syscall_table );
2583    for (i = 0; i < n; i++) {
2584       pair = VG_(indexXA)( post_syscall_table, i );
2585 #     if defined(VGO_linux) || defined(VGO_darwin)
2586       if (pair->uw1 == (UWord)sysno)
2587          break;
2588 #     elif defined(VGO_aix5)
2589       if (*(Int*)(pair->uw1) == (Int)sysno)
2590          break;
2591 #     else
2592 #        error "Unsupported OS"
2593 #     endif
2594    }
2595 
2596    tl_assert(i >= 0 && i <= n);
2597 
2598    if (i == n) {
2599       VG_(printf)("sysno == %s", VG_SYSNUM_STRING_EXTRA(sysno));
2600       VG_(tool_panic)("unhandled syscall");
2601    }
2602 
2603    /* So we found the relevant entry.  Move it one step
2604       forward so as to speed future accesses to it. */
2605    if (i > 0) {
2606       UWordPair tmp, *p, *q;
2607       p = VG_(indexXA)( post_syscall_table, i-1 );
2608       q = VG_(indexXA)( post_syscall_table, i-0 );
2609       tmp = *p;
2610       *p = *q;
2611       *q = tmp;
2612       i--;
2613    }
2614 
2615    /* Deal with the common case */
2616    pair = VG_(indexXA)( post_syscall_table, i );
2617    if (pair->uw2 == 0)
2618       /* the common case */
2619       goto res_NONPTR_err_NONPTR;
2620 
2621    /* Special handling for all remaining cases */
2622    tl_assert(pair->uw2 == 1);
2623 
2624 #  if defined(__NR_arch_prctl)
2625    if (sysno == __NR_arch_prctl) {
2626       /* This is nasty.  On amd64-linux, arch_prctl may write a
2627          value to guest_FS_ZERO, and we need to shadow that value.
2628          Hence apply nonptr_or_unknown to it here, after the
2629          syscall completes. */
2630       post_reg_write_nonptr_or_unknown( tid, PC_OFF_FS_ZERO,
2631                                              PC_SZB_FS_ZERO );
2632       goto res_NONPTR_err_NONPTR;
2633    }
2634 #  endif
2635 
2636 #  if defined(__NR_brk)
2637    // With brk(), result (of kernel syscall, not glibc wrapper) is a heap
2638    // pointer.  Make the shadow UNKNOWN.
2639    if (sysno == __NR_brk)
2640       goto res_UNKNOWN_err_NONPTR;
2641 #  endif
2642 
2643    // With mmap, new_mem_mmap() has already been called and added the
2644    // segment (we did it there because we had the result address and size
2645    // handy).  So just set the return value shadow.
2646    if (sysno == __NR_mmap
2647 #      if defined(__NR_mmap2)
2648        || sysno == __NR_mmap2
2649 #      endif
2650 #      if defined(__NR_AIX5___loadx)
2651        || (sysno == __NR_AIX5___loadx && __NR_AIX5___loadx != __NR_AIX5_UNKNOWN)
2652 #      endif
2653 #      if defined(__NR_AIX5_kload)
2654        || (sysno == __NR_AIX5_kload && __NR_AIX5_kload != __NR_AIX5_UNKNOWN)
2655 #      endif
2656       ) {
2657       if (sr_isError(res)) {
2658          // mmap() had an error, return value is a small negative integer
2659          goto res_NONPTR_err_NONPTR;
2660       } else {
2661          goto res_UNKNOWN_err_NONPTR;
2662       }
2663       return;
2664    }
2665 
2666    // shmat uses the same scheme.  We will just have had a
2667    // notification via new_mem_mmap.  Just set the return value shadow.
2668 #  if defined(__NR_shmat)
2669    if (sysno == __NR_shmat) {
2670       if (sr_isError(res)) {
2671          goto res_NONPTR_err_NONPTR;
2672       } else {
2673          goto res_UNKNOWN_err_NONPTR;
2674       }
2675    }
2676 #  endif
2677 
2678 #  if defined(__NR_shmget)
2679    if (sysno == __NR_shmget)
2680       // FIXME: is this correct?
2681       goto res_UNKNOWN_err_NONPTR;
2682 #  endif
2683 
2684 #  if defined(__NR_ipc) && defined(VKI_SHMAT)
2685    /* perhaps this should be further conditionalised with
2686       && (defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
2687       Note, this just copies the behaviour of __NR_shmget above.
2688 
2689       JRS 2009 June 02: it seems that the return value from
2690       sys_ipc(VKI_SHMAT, ...) doesn't have much relationship to the
2691       result returned by the originating user-level shmat call.  It's
2692       different (and much lower) by a large but integral number of
2693       pages.  I don't have time to chase this right now.  Observed on
2694       ppc{32,64}-linux.  Result appears to be false errors from apps
2695       using shmat.  Confusion though -- shouldn't be related to the
2696       actual numeric values returned by the syscall, though, should
2697       it?  Confused.  Maybe some bad interaction with a
2698       nonpointer-or-unknown heuristic? */
2699    if (sysno == __NR_ipc) {
2700       if (args[0] == VKI_SHMAT) {
2701          goto res_UNKNOWN_err_NONPTR;
2702       } else {
2703          goto res_NONPTR_err_NONPTR;
2704       }
2705    }
2706 #  endif
2707 
2708    /* If we get here, it implies the corresponding entry in
2709       post_syscall_table has .w2 == 1, which in turn implies there
2710       should be special-case code for it above. */
2711    tl_assert(0);
2712 
2713   res_NONPTR_err_NONPTR:
2714    VG_(set_syscall_return_shadows)( tid, /* retval */ (UWord)NONPTR, 0,
2715                                          /* error */  (UWord)NONPTR, 0 );
2716    return;
2717 
2718   res_UNKNOWN_err_NONPTR:
2719    VG_(set_syscall_return_shadows)( tid, /* retval */ (UWord)UNKNOWN, 0,
2720                                          /* error */  (UWord)NONPTR, 0 );
2721    return;
2722 }
2723 
2724 
2725 /*--------------------------------------------------------------------*/
2726 /*--- Functions called from generated code                         ---*/
2727 /*--------------------------------------------------------------------*/
2728 
2729 #if SC_SEGS
checkSeg(Seg vseg)2730 static void checkSeg ( Seg vseg ) {
2731    tl_assert(vseg == UNKNOWN || vseg == NONPTR || vseg == BOTTOM
2732              || Seg__plausible(vseg) );
2733 }
2734 #endif
2735 
2736 // XXX: could be more sophisticated -- actually track the lowest/highest
2737 // valid address used by the program, and then return False for anything
2738 // below that (using a suitable safety margin).  Also, nothing above
2739 // 0xc0000000 is valid [unless you've changed that in your kernel]
looks_like_a_pointer(Addr a)2740 static inline Bool looks_like_a_pointer(Addr a)
2741 {
2742 #  if defined(VGA_x86) || defined(VGA_ppc32)
2743    tl_assert(sizeof(UWord) == 4);
2744    return (a > 0x800000UL && a < 0xFF000000UL);
2745 
2746 #  elif defined(VGA_amd64) || defined(VGA_ppc64)
2747    tl_assert(sizeof(UWord) == 8);
2748    return (a >= 16 * 0x10000UL && a < 0xFF00000000000000UL);
2749 
2750 #  elif defined(VGA_arm)
2751    /* Unfortunately arm-linux seems to load the exe at very low, at
2752       0x8000, so we have to assume any value above that is a pointer,
2753       which is pretty dismal. */
2754    tl_assert(sizeof(UWord) == 4);
2755    return (a >= 0x00008000UL && a < 0xFF000000UL);
2756 
2757 #  else
2758 #    error "Unsupported architecture"
2759 #  endif
2760 }
2761 
2762 static inline VG_REGPARM(1)
nonptr_or_unknown(UWord x)2763 Seg* nonptr_or_unknown(UWord x)
2764 {
2765    Seg* res = looks_like_a_pointer(x) ? UNKNOWN : NONPTR;
2766    if (0) VG_(printf)("nonptr_or_unknown %s %#lx\n",
2767                       res==UNKNOWN ? "UUU" : "nnn", x);
2768    return res;
2769 }
2770 
2771 //zz static __attribute__((regparm(1)))
2772 //zz void print_BB_entry(UInt bb)
2773 //zz {
2774 //zz    VG_(printf)("%u =\n", bb);
2775 //zz }
2776 
2777 //static ULong stats__tot_mem_refs  = 0;
2778 //static ULong stats__refs_in_a_seg = 0;
2779 //static ULong stats__refs_lost_seg = 0;
2780 
2781 typedef
2782    struct { ExeContext* ec; UWord count; }
2783    Lossage;
2784 
2785 static OSet* lossage = NULL;
2786 
2787 //static void inc_lossage ( ExeContext* ec )
2788 //{
2789 //   Lossage key, *res, *nyu;
2790 //   key.ec = ec;
2791 //   key.count = 0; /* frivolous */
2792 //   res = VG_(OSetGen_Lookup)(lossage, &key);
2793 //   if (res) {
2794 //      tl_assert(res->ec == ec);
2795 //      res->count++;
2796 //   } else {
2797 //      nyu = (Lossage*)VG_(OSetGen_AllocNode)(lossage, sizeof(Lossage));
2798 //      tl_assert(nyu);
2799 //      nyu->ec = ec;
2800 //      nyu->count = 1;
2801 //      VG_(OSetGen_Insert)( lossage, nyu );
2802 //   }
2803 //}
2804 
init_lossage(void)2805 static void init_lossage ( void )
2806 {
2807    lossage = VG_(OSetGen_Create)( /*keyOff*/ offsetof(Lossage,ec),
2808                                   /*fastCmp*/NULL,
2809                                   VG_(malloc), "pc.h_main.il.1",
2810                                   VG_(free) );
2811    tl_assert(lossage);
2812 }
2813 
2814 //static void show_lossage ( void )
2815 //{
2816 //   Lossage* elem;
2817 //   VG_(OSetGen_ResetIter)( lossage );
2818 //   while ( (elem = VG_(OSetGen_Next)(lossage)) ) {
2819 //      if (elem->count < 10) continue;
2820 //      //Char buf[100];
2821 //      //(void)VG_(describe_IP)(elem->ec, buf, sizeof(buf)-1);
2822 //      //buf[sizeof(buf)-1] = 0;
2823 //      //VG_(printf)("  %,8lu  %s\n", elem->count, buf);
2824 //      VG_(message)(Vg_UserMsg, "Lossage count %'lu at", elem->count);
2825 //      VG_(pp_ExeContext)(elem->ec);
2826 //   }
2827 //}
2828 
2829 // This function is called *a lot*; inlining it sped up Konqueror by 20%.
2830 static inline
check_load_or_store(Bool is_write,Addr m,UWord sz,Seg * mptr_vseg)2831 void check_load_or_store(Bool is_write, Addr m, UWord sz, Seg* mptr_vseg)
2832 {
2833 #if 0
2834    tl_assert(0);
2835    if (h_clo_lossage_check) {
2836       Seg* seg;
2837       stats__tot_mem_refs++;
2838       if (ISList__findI0( seglist, (Addr)m, &seg )) {
2839          /* m falls inside 'seg' (that is, we are making a memory
2840             reference inside 'seg').  Now, really mptr_vseg should be
2841             a tracked segment of some description.  Badness is when
2842             mptr_vseg is UNKNOWN, BOTTOM or NONPTR at this point,
2843             since that means we've lost the type of it somehow: it
2844             shoud say that m points into a real segment (preferable
2845             'seg'), but it doesn't. */
2846          if (Seg__status_is_SegHeap(seg)) {
2847             stats__refs_in_a_seg++;
2848             if (UNKNOWN == mptr_vseg
2849                 || BOTTOM == mptr_vseg || NONPTR == mptr_vseg) {
2850                ExeContext* ec;
2851                Char buf[100];
2852                static UWord xx = 0;
2853                stats__refs_lost_seg++;
2854                ec = VG_(record_ExeContext)( VG_(get_running_tid)(), 0 );
2855                inc_lossage(ec);
2856                if (0) {
2857                   VG_(message)(Vg_DebugMsg, "");
2858                   VG_(message)(Vg_DebugMsg,
2859                                "Lossage %s %#lx sz %lu inside block alloc'd",
2860                                is_write ? "wr" : "rd", m, (UWord)sz);
2861                   VG_(pp_ExeContext)(Seg__where(seg));
2862                }
2863                if (xx++ < 0) {
2864                   Addr ip = VG_(get_IP)( VG_(get_running_tid)() );
2865                   (void)VG_(describe_IP)( ip, buf, sizeof(buf)-1);
2866                   buf[sizeof(buf)-1] = 0;
2867                   VG_(printf)("lossage at %p %s\n", ec, buf );
2868                }
2869             }
2870          }
2871       }
2872    } /* clo_lossage_check */
2873 #endif
2874 
2875 #  if SC_SEGS
2876    checkSeg(mptr_vseg);
2877 #  endif
2878 
2879    if (UNKNOWN == mptr_vseg) {
2880       // do nothing
2881 
2882    } else if (BOTTOM == mptr_vseg) {
2883       // do nothing
2884 
2885    } else if (NONPTR == mptr_vseg) {
2886       h_record_heap_error( m, sz, mptr_vseg, is_write );
2887 
2888    } else {
2889       // check all segment ranges in the circle
2890       // if none match, warn about 1st seg
2891       // else,          check matching one isn't freed
2892       Bool is_ok = False;
2893       Seg* curr  = mptr_vseg;
2894       Addr mhi;
2895 
2896       // Accesses partly outside range are an error, unless it's an aligned
2897       // word-sized read, and --partial-loads-ok=yes.  This is to cope with
2898       // gcc's/glibc's habits of doing word-sized accesses that read past
2899       // the ends of arrays/strings.
2900       // JRS 2008-sept-11: couldn't this be moved off the critical path?
2901       if (!is_write && sz == sizeof(UWord)
2902           && h_clo_partial_loads_ok && SHMEM_IS_WORD_ALIGNED(m)) {
2903          mhi = m;
2904       } else {
2905          mhi = m+sz-1;
2906       }
2907 
2908       if (0) VG_(printf)("calling seg_ci %p %#lx %#lx\n", curr,m,mhi);
2909       is_ok = curr->addr <= m && mhi < curr->addr + curr->szB;
2910 
2911       // If it's an overrun/underrun of a freed block, don't give both
2912       // warnings, since the first one mentions that the block has been
2913       // freed.
2914       if ( ! is_ok || Seg__is_freed(curr) )
2915          h_record_heap_error( m, sz, mptr_vseg, is_write );
2916    }
2917 }
2918 
2919 // ------------------ Load handlers ------------------ //
2920 
2921 /* On 32 bit targets, we will use:
2922       check_load1 check_load2 check_load4_P
2923       check_load4  (for 32-bit FP reads)
2924       check_load8  (for 64-bit FP reads)
2925       check_load16 (for xmm/altivec reads)
2926    On 64 bit targets, we will use:
2927       check_load1 check_load2 check_load4 check_load8_P
2928       check_load8  (for 64-bit FP reads)
2929       check_load16 (for xmm/altivec reads)
2930 
2931    A "_P" handler reads a pointer from memory, and so returns a value
2932    to the generated code -- the pointer's shadow value.  That implies
2933    that check_load4_P is only to be called on a 32 bit host and
2934    check_load8_P is only to be called on a 64 bit host.  For all other
2935    cases no shadow value is returned; we merely check that the pointer
2936    (m) matches the block described by its shadow value (mptr_vseg).
2937 */
2938 
2939 // This handles 128 bit loads on both 32 bit and 64 bit targets.
2940 static VG_REGPARM(2)
check_load16(Addr m,Seg * mptr_vseg)2941 void check_load16(Addr m, Seg* mptr_vseg)
2942 {
2943 #  if SC_SEGS
2944    checkSeg(mptr_vseg);
2945 #  endif
2946    check_load_or_store(/*is_write*/False, m, 16, mptr_vseg);
2947 }
2948 
2949 // This handles 64 bit FP-or-otherwise-nonpointer loads on both
2950 // 32 bit and 64 bit targets.
2951 static VG_REGPARM(2)
check_load8(Addr m,Seg * mptr_vseg)2952 void check_load8(Addr m, Seg* mptr_vseg)
2953 {
2954 #  if SC_SEGS
2955    checkSeg(mptr_vseg);
2956 #  endif
2957    check_load_or_store(/*is_write*/False, m, 8, mptr_vseg);
2958 }
2959 
2960 // This handles 64 bit loads on 64 bit targets.  It must
2961 // not be called on 32 bit targets.
2962 // return m.vseg
2963 static VG_REGPARM(2)
check_load8_P(Addr m,Seg * mptr_vseg)2964 Seg* check_load8_P(Addr m, Seg* mptr_vseg)
2965 {
2966    Seg* vseg;
2967    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
2968 #  if SC_SEGS
2969    checkSeg(mptr_vseg);
2970 #  endif
2971    check_load_or_store(/*is_write*/False, m, 8, mptr_vseg);
2972    if (VG_IS_8_ALIGNED(m)) {
2973       vseg = get_mem_vseg(m);
2974    } else {
2975       vseg = nonptr_or_unknown( *(ULong*)m );
2976    }
2977    return vseg;
2978 }
2979 
2980 // This handles 32 bit loads on 32 bit targets.  It must
2981 // not be called on 64 bit targets.
2982 // return m.vseg
2983 static VG_REGPARM(2)
check_load4_P(Addr m,Seg * mptr_vseg)2984 Seg* check_load4_P(Addr m, Seg* mptr_vseg)
2985 {
2986    Seg* vseg;
2987    tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
2988 #  if SC_SEGS
2989    checkSeg(mptr_vseg);
2990 #  endif
2991    check_load_or_store(/*is_write*/False, m, 4, mptr_vseg);
2992    if (VG_IS_4_ALIGNED(m)) {
2993       vseg = get_mem_vseg(m);
2994    } else {
2995       vseg = nonptr_or_unknown( *(UInt*)m );
2996    }
2997    return vseg;
2998 }
2999 
3000 // Used for both 32 bit and 64 bit targets.
3001 static VG_REGPARM(2)
check_load4(Addr m,Seg * mptr_vseg)3002 void check_load4(Addr m, Seg* mptr_vseg)
3003 {
3004 #  if SC_SEGS
3005    checkSeg(mptr_vseg);
3006 #  endif
3007    check_load_or_store(/*is_write*/False, m, 4, mptr_vseg);
3008 }
3009 
3010 // Used for both 32 bit and 64 bit targets.
3011 static VG_REGPARM(2)
check_load2(Addr m,Seg * mptr_vseg)3012 void check_load2(Addr m, Seg* mptr_vseg)
3013 {
3014 #  if SC_SEGS
3015    checkSeg(mptr_vseg);
3016 #  endif
3017    check_load_or_store(/*is_write*/False, m, 2, mptr_vseg);
3018 }
3019 
3020 // Used for both 32 bit and 64 bit targets.
3021 static VG_REGPARM(2)
check_load1(Addr m,Seg * mptr_vseg)3022 void check_load1(Addr m, Seg* mptr_vseg)
3023 {
3024 #  if SC_SEGS
3025    checkSeg(mptr_vseg);
3026 #  endif
3027    check_load_or_store(/*is_write*/False, m, 1, mptr_vseg);
3028 }
3029 
3030 // ------------------ Store handlers ------------------ //
3031 
3032 /* On 32 bit targets, we will use:
3033       check_store1 check_store2 check_store4_P check_store4C_P
3034       check_store4 (for 32-bit nonpointer stores)
3035       check_store8_ms4B_ls4B (for 64-bit stores)
3036       check_store16_ms4B_4B_4B_ls4B (for xmm/altivec stores)
3037 
3038    On 64 bit targets, we will use:
3039       check_store1 check_store2 check_store4 check_store4C
3040       check_store8_P check_store_8C_P
3041       check_store8_all8B (for 64-bit nonpointer stores)
3042       check_store16_ms8B_ls8B (for xmm/altivec stores)
3043 
3044    A "_P" handler writes a pointer to memory, and so has an extra
3045    argument -- the pointer's shadow value.  That implies that
3046    check_store4{,C}_P is only to be called on a 32 bit host and
3047    check_store8{,C}_P is only to be called on a 64 bit host.  For all
3048    other cases, and for the misaligned _P cases, the strategy is to
3049    let the store go through, and then snoop around with
3050    nonptr_or_unknown to fix up the shadow values of any affected
3051    words. */
3052 
3053 /* Helpers for store-conditionals.  Ugly kludge :-(
3054    They all return 1 if the SC was successful and 0 if it failed. */
do_store_conditional_32(Addr m,UInt t)3055 static inline UWord do_store_conditional_32( Addr m/*dst*/, UInt t/*val*/ )
3056 {
3057 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
3058    UWord success;
3059    /* If this assertion fails, the underlying IR is (semantically) ill-formed
3060       as per the IR spec for IRStmt_Store. */
3061    tl_assert(VG_IS_4_ALIGNED(m));
3062    __asm__ __volatile__(
3063       "stwcx. %2,0,%1"    "\n\t" /* data,0,addr */
3064       "mfcr   %0"         "\n\t"
3065       "srwi   %0,%0,29"   "\n\t" /* move relevant CR bit to LSB */
3066       : /*out*/"=b"(success)
3067       : /*in*/ "b"(m), "b"( (UWord)t )
3068       : /*trash*/ "memory", "cc"
3069         /* Note: srwi is OK even on 64-bit host because the we're
3070            after bit 29 (normal numbering) and we mask off all the
3071            other junk just below. */
3072    );
3073    return success & (UWord)1;
3074 #  else
3075    tl_assert(0); /* not implemented on other platforms */
3076 #  endif
3077 }
3078 
do_store_conditional_64(Addr m,ULong t)3079 static inline UWord do_store_conditional_64( Addr m/*dst*/, ULong t/*val*/ )
3080 {
3081 #  if defined(VGA_ppc64)
3082    UWord success;
3083    /* If this assertion fails, the underlying IR is (semantically) ill-formed
3084       as per the IR spec for IRStmt_Store. */
3085    tl_assert(VG_IS_8_ALIGNED(m));
3086    __asm__ __volatile__(
3087       "stdcx. %2,0,%1"    "\n\t" /* data,0,addr */
3088       "mfcr   %0"         "\n\t"
3089       "srdi   %0,%0,29"   "\n\t" /* move relevant CR bit to LSB */
3090       : /*out*/"=b"(success)
3091       : /*in*/ "b"(m), "b"( (UWord)t )
3092       : /*trash*/ "memory", "cc"
3093    );
3094    return success & (UWord)1;
3095 #  else
3096    tl_assert(0); /* not implemented on other platforms */
3097 #  endif
3098 }
3099 
3100 /* Apply nonptr_or_unknown to all the words intersecting
3101    [a, a+len). */
3102 static inline VG_REGPARM(2)
nonptr_or_unknown_range(Addr a,SizeT len)3103 void nonptr_or_unknown_range ( Addr a, SizeT len )
3104 {
3105    const SizeT wszB = sizeof(UWord);
3106    Addr wfirst = VG_ROUNDDN(a,       wszB);
3107    Addr wlast  = VG_ROUNDDN(a+len-1, wszB);
3108    Addr a2;
3109    tl_assert(wfirst <= wlast);
3110    for (a2 = wfirst ; a2 <= wlast; a2 += wszB) {
3111       set_mem_vseg( a2, nonptr_or_unknown( *(UWord*)a2 ));
3112    }
3113 }
3114 
3115 // Write to shadow memory, for a 32-bit store.  Must only
3116 // be used on 32-bit targets.
3117 static inline VG_REGPARM(2)
do_shadow_store4_P(Addr m,Seg * vseg)3118 void do_shadow_store4_P( Addr m, Seg* vseg )
3119 {
3120    tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
3121    if (VG_IS_4_ALIGNED(m)) {
3122       set_mem_vseg( m, vseg );
3123    } else {
3124       // straddling two words
3125       nonptr_or_unknown_range(m, 4);
3126    }
3127 }
3128 
3129 // Write to shadow memory, for a 64-bit store.  Must only
3130 // be used on 64-bit targets.
3131 static inline VG_REGPARM(2)
do_shadow_store8_P(Addr m,Seg * vseg)3132 void do_shadow_store8_P( Addr m, Seg* vseg )
3133 {
3134    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
3135    if (VG_IS_8_ALIGNED(m)) {
3136       set_mem_vseg( m, vseg );
3137    } else {
3138       // straddling two words
3139       nonptr_or_unknown_range(m, 8);
3140    }
3141 }
3142 
3143 // This handles 128 bit stores on 64 bit targets.  The
3144 // store data is passed in 2 pieces, the most significant
3145 // bits first.
3146 static VG_REGPARM(3)
check_store16_ms8B_ls8B(Addr m,Seg * mptr_vseg,UWord ms8B,UWord ls8B)3147 void check_store16_ms8B_ls8B(Addr m, Seg* mptr_vseg,
3148                              UWord ms8B, UWord ls8B)
3149 {
3150    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
3151 #  if SC_SEGS
3152    checkSeg(mptr_vseg);
3153 #  endif
3154    check_load_or_store(/*is_write*/True, m, 16, mptr_vseg);
3155    // Actually *do* the STORE here
3156    if (host_is_little_endian()) {
3157       // FIXME: aren't we really concerned whether the guest
3158       // is little endian, not whether the host is?
3159       *(ULong*)(m + 0) = ls8B;
3160       *(ULong*)(m + 8) = ms8B;
3161    } else {
3162       *(ULong*)(m + 0) = ms8B;
3163       *(ULong*)(m + 8) = ls8B;
3164    }
3165    nonptr_or_unknown_range(m, 16);
3166 }
3167 
3168 // This handles 128 bit stores on 64 bit targets.  The
3169 // store data is passed in 2 pieces, the most significant
3170 // bits first.
3171 static VG_REGPARM(3)
check_store16_ms4B_4B_4B_ls4B(Addr m,Seg * mptr_vseg,UWord ms4B,UWord w2,UWord w1,UWord ls4B)3172 void check_store16_ms4B_4B_4B_ls4B(Addr m, Seg* mptr_vseg,
3173                                    UWord ms4B, UWord w2,
3174                                    UWord w1,   UWord ls4B)
3175 {
3176    tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
3177 #  if SC_SEGS
3178    checkSeg(mptr_vseg);
3179 #  endif
3180    check_load_or_store(/*is_write*/True, m, 16, mptr_vseg);
3181    // Actually *do* the STORE here
3182    if (host_is_little_endian()) {
3183       // FIXME: aren't we really concerned whether the guest
3184       // is little endian, not whether the host is?
3185       *(UInt*)(m +  0) = ls4B;
3186       *(UInt*)(m +  4) = w1;
3187       *(UInt*)(m +  8) = w2;
3188       *(UInt*)(m + 12) = ms4B;
3189    } else {
3190       *(UInt*)(m +  0) = ms4B;
3191       *(UInt*)(m +  4) = w2;
3192       *(UInt*)(m +  8) = w1;
3193       *(UInt*)(m + 12) = ls4B;
3194    }
3195    nonptr_or_unknown_range(m, 16);
3196 }
3197 
3198 // This handles 64 bit stores on 32 bit targets.  The
3199 // store data is passed in 2 pieces, the most significant
3200 // bits first.
3201 static VG_REGPARM(3)
check_store8_ms4B_ls4B(Addr m,Seg * mptr_vseg,UWord ms4B,UWord ls4B)3202 void check_store8_ms4B_ls4B(Addr m, Seg* mptr_vseg,
3203                             UWord ms4B, UWord ls4B)
3204 {
3205    tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
3206 #  if SC_SEGS
3207    checkSeg(mptr_vseg);
3208 #  endif
3209    check_load_or_store(/*is_write*/True, m, 8, mptr_vseg);
3210    // Actually *do* the STORE here
3211    if (host_is_little_endian()) {
3212       // FIXME: aren't we really concerned whether the guest
3213       // is little endian, not whether the host is?
3214       *(UInt*)(m + 0) = ls4B;
3215       *(UInt*)(m + 4) = ms4B;
3216    } else {
3217       *(UInt*)(m + 0) = ms4B;
3218       *(UInt*)(m + 4) = ls4B;
3219    }
3220    nonptr_or_unknown_range(m, 8);
3221 }
3222 
3223 // This handles 64 bit non pointer stores on 64 bit targets.
3224 // It must not be called on 32 bit targets.
3225 static VG_REGPARM(3)
check_store8_all8B(Addr m,Seg * mptr_vseg,UWord all8B)3226 void check_store8_all8B(Addr m, Seg* mptr_vseg, UWord all8B)
3227 {
3228    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
3229 #  if SC_SEGS
3230    checkSeg(mptr_vseg);
3231 #  endif
3232    check_load_or_store(/*is_write*/True, m, 8, mptr_vseg);
3233    // Actually *do* the STORE here
3234    *(ULong*)m = all8B;
3235    nonptr_or_unknown_range(m, 8);
3236 }
3237 
3238 // This handles 64 bit stores on 64 bit targets.  It must
3239 // not be called on 32 bit targets.
3240 static VG_REGPARM(3)
check_store8_P(Addr m,Seg * mptr_vseg,UWord t,Seg * t_vseg)3241 void check_store8_P(Addr m, Seg* mptr_vseg, UWord t, Seg* t_vseg)
3242 {
3243    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
3244 #  if SC_SEGS
3245    checkSeg(t_vseg);
3246    checkSeg(mptr_vseg);
3247 #  endif
3248    check_load_or_store(/*is_write*/True, m, 8, mptr_vseg);
3249    // Actually *do* the STORE here
3250    *(ULong*)m = t;
3251    do_shadow_store8_P( m, t_vseg );
3252 }
3253 
3254 // This handles 64 bit store-conditionals on 64 bit targets.  It must
3255 // not be called on 32 bit targets.
3256 static VG_REGPARM(3)
check_store8C_P(Addr m,Seg * mptr_vseg,UWord t,Seg * t_vseg)3257 UWord check_store8C_P(Addr m, Seg* mptr_vseg, UWord t, Seg* t_vseg)
3258 {
3259    UWord success;
3260    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
3261 #  if SC_SEGS
3262    checkSeg(t_vseg);
3263    checkSeg(mptr_vseg);
3264 #  endif
3265    check_load_or_store(/*is_write*/True, m, 8, mptr_vseg);
3266    // Actually *do* the STORE here
3267    success = do_store_conditional_64( m, t );
3268    if (success)
3269       do_shadow_store8_P( m, t_vseg );
3270    return success;
3271 }
3272 
3273 // This handles 32 bit stores on 32 bit targets.  It must
3274 // not be called on 64 bit targets.
3275 static VG_REGPARM(3)
check_store4_P(Addr m,Seg * mptr_vseg,UWord t,Seg * t_vseg)3276 void check_store4_P(Addr m, Seg* mptr_vseg, UWord t, Seg* t_vseg)
3277 {
3278    tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
3279 #  if SC_SEGS
3280    checkSeg(t_vseg);
3281    checkSeg(mptr_vseg);
3282 #  endif
3283    check_load_or_store(/*is_write*/True, m, 4, mptr_vseg);
3284    // Actually *do* the STORE here
3285    *(UInt*)m = t;
3286    do_shadow_store4_P( m, t_vseg );
3287 }
3288 
3289 // This handles 32 bit store-conditionals on 32 bit targets.  It must
3290 // not be called on 64 bit targets.
3291 static VG_REGPARM(3)
check_store4C_P(Addr m,Seg * mptr_vseg,UWord t,Seg * t_vseg)3292 UWord check_store4C_P(Addr m, Seg* mptr_vseg, UWord t, Seg* t_vseg)
3293 {
3294    UWord success;
3295    tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
3296 #  if SC_SEGS
3297    checkSeg(t_vseg);
3298    checkSeg(mptr_vseg);
3299 #  endif
3300    check_load_or_store(/*is_write*/True, m, 4, mptr_vseg);
3301    // Actually *do* the STORE here
3302    success = do_store_conditional_32( m, t );
3303    if (success)
3304       do_shadow_store4_P( m, t_vseg );
3305    return success;
3306 }
3307 
3308 // Used for both 32 bit and 64 bit targets.
3309 static VG_REGPARM(3)
check_store4(Addr m,Seg * mptr_vseg,UWord t)3310 void check_store4(Addr m, Seg* mptr_vseg, UWord t)
3311 {
3312 #  if SC_SEGS
3313    checkSeg(mptr_vseg);
3314 #  endif
3315    check_load_or_store(/*is_write*/True, m, 4, mptr_vseg);
3316    // Actually *do* the STORE here  (Nb: cast must be to 4-byte type!)
3317    *(UInt*)m = t;
3318    nonptr_or_unknown_range(m, 4);
3319 }
3320 
3321 // Used for 32-bit store-conditionals on 64 bit targets only.  It must
3322 // not be called on 32 bit targets.
3323 static VG_REGPARM(3)
check_store4C(Addr m,Seg * mptr_vseg,UWord t)3324 UWord check_store4C(Addr m, Seg* mptr_vseg, UWord t)
3325 {
3326    UWord success;
3327    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
3328 #  if SC_SEGS
3329    checkSeg(mptr_vseg);
3330 #  endif
3331    check_load_or_store(/*is_write*/True, m, 4, mptr_vseg);
3332    // Actually *do* the STORE here
3333    success = do_store_conditional_32( m, t );
3334    if (success)
3335       nonptr_or_unknown_range(m, 4);
3336    return success;
3337 }
3338 
3339 // Used for both 32 bit and 64 bit targets.
3340 static VG_REGPARM(3)
check_store2(Addr m,Seg * mptr_vseg,UWord t)3341 void check_store2(Addr m, Seg* mptr_vseg, UWord t)
3342 {
3343 #  if SC_SEGS
3344    checkSeg(mptr_vseg);
3345 #  endif
3346    check_load_or_store(/*is_write*/True, m, 2, mptr_vseg);
3347    // Actually *do* the STORE here  (Nb: cast must be to 2-byte type!)
3348    *(UShort*)m = t;
3349    nonptr_or_unknown_range(m, 2);
3350 }
3351 
3352 // Used for both 32 bit and 64 bit targets.
3353 static VG_REGPARM(3)
check_store1(Addr m,Seg * mptr_vseg,UWord t)3354 void check_store1(Addr m, Seg* mptr_vseg, UWord t)
3355 {
3356 #  if SC_SEGS
3357    checkSeg(mptr_vseg);
3358 #  endif
3359    check_load_or_store(/*is_write*/True, m, 1, mptr_vseg);
3360    // Actually *do* the STORE here  (Nb: cast must be to 1-byte type!)
3361    *(UChar*)m = t;
3362    nonptr_or_unknown_range(m, 1);
3363 }
3364 
3365 
3366 // Nb: if the result is BOTTOM, return immedately -- don't let BOTTOM
3367 //     be changed to NONPTR by a range check on the result.
3368 #define BINOP(bt, nn, nu, np, un, uu, up, pn, pu, pp) \
3369    if (BOTTOM == seg1 || BOTTOM == seg2) { bt;                   \
3370    } else if (NONPTR == seg1)  { if      (NONPTR == seg2)  { nn; }  \
3371                                  else if (UNKNOWN == seg2) { nu; }    \
3372                                  else                      { np; }    \
3373    } else if (UNKNOWN == seg1) { if      (NONPTR == seg2)  { un; }    \
3374                                  else if (UNKNOWN == seg2) { uu; }    \
3375                                  else                      { up; }    \
3376    } else                      { if      (NONPTR == seg2)  { pn; }    \
3377                                  else if (UNKNOWN == seg2) { pu; }    \
3378                                  else                      { pp; }    \
3379    }
3380 
3381 #define BINERROR(opname)                    \
3382    h_record_arith_error(seg1, seg2, opname);  \
3383    out = NONPTR
3384 
3385 
3386 // -------------
3387 //  + | n  ?  p
3388 // -------------
3389 //  n | n  ?  p
3390 //  ? | ?  ?  ?
3391 //  p | p  ?  e   (all results become n if they look like a non-pointer)
3392 // -------------
do_addW_result(Seg * seg1,Seg * seg2,UWord result,HChar * opname)3393 static Seg* do_addW_result(Seg* seg1, Seg* seg2, UWord result, HChar* opname)
3394 {
3395    Seg* out;
3396 #  if SC_SEGS
3397    checkSeg(seg1);
3398    checkSeg(seg2);
3399 #  endif
3400    BINOP(
3401       return BOTTOM,
3402       out = NONPTR,  out = UNKNOWN, out = seg2,
3403       out = UNKNOWN, out = UNKNOWN, out = UNKNOWN,
3404       out = seg1,    out = UNKNOWN,       BINERROR(opname)
3405    );
3406    return ( looks_like_a_pointer(result) ? out : NONPTR );
3407 }
3408 
do_addW(Seg * seg1,Seg * seg2,UWord result)3409 static VG_REGPARM(3) Seg* do_addW(Seg* seg1, Seg* seg2, UWord result)
3410 {
3411    Seg* out;
3412 #  if SC_SEGS
3413    checkSeg(seg1);
3414    checkSeg(seg2);
3415 #  endif
3416    out = do_addW_result(seg1, seg2, result, "Add32/Add64");
3417 #  if SC_SEGS
3418    checkSeg(out);
3419 #  endif
3420    return out;
3421 }
3422 
3423 // -------------
3424 //  - | n  ?  p      (Nb: operation is seg1 - seg2)
3425 // -------------
3426 //  n | n  ?  n+     (+) happens a lot due to "cmp", but result should never
3427 //  ? | ?  ?  n/B        be used, so give 'n'
3428 //  p | p  p? n*/B   (*) and possibly link the segments
3429 // -------------
do_subW(Seg * seg1,Seg * seg2,UWord result)3430 static VG_REGPARM(3) Seg* do_subW(Seg* seg1, Seg* seg2, UWord result)
3431 {
3432    Seg* out;
3433 #  if SC_SEGS
3434    checkSeg(seg1);
3435    checkSeg(seg2);
3436 #  endif
3437    // Nb: when returning BOTTOM, don't let it go through the range-check;
3438    //     a segment linking offset can easily look like a nonptr.
3439    BINOP(
3440       return BOTTOM,
3441       out = NONPTR,  out = UNKNOWN,    out = NONPTR,
3442       out = UNKNOWN, out = UNKNOWN,    return BOTTOM,
3443       out = seg1,    out = seg1/*??*/, return BOTTOM
3444    );
3445    #if 0
3446          // This is for the p-p segment-linking case
3447          Seg end2 = seg2;
3448          while (end2->links != seg2) end2 = end2->links;
3449          end2->links = seg1->links;
3450          seg1->links = seg2;
3451          return NONPTR;
3452    #endif
3453    return ( looks_like_a_pointer(result) ? out : NONPTR );
3454 }
3455 
3456 // -------------
3457 //  & | n  ?  p
3458 // -------------
3459 //  n | n  ?  p
3460 //  ? | ?  ?  ?
3461 //  p | p  ?  *  (*) if p1==p2 then p else e (see comment)
3462 // -------------
3463 /* Seems to be OK to And two pointers:
3464      testq %ptr1,%ptr2
3465      jnz ..
3466    which possibly derives from
3467      if (ptr1 & ptr2) { A } else { B }
3468    not sure what that means
3469 */
do_andW(Seg * seg1,Seg * seg2,UWord result,UWord args_diff)3470 static VG_REGPARM(3) Seg* do_andW(Seg* seg1, Seg* seg2,
3471                                   UWord result, UWord args_diff)
3472 {
3473    Seg* out;
3474    if (0 == args_diff) {
3475       // p1==p2
3476       out = seg1;
3477    } else {
3478       BINOP(
3479          return BOTTOM,
3480          out = NONPTR,  out = UNKNOWN, out = seg2,
3481          out = UNKNOWN, out = UNKNOWN, out = UNKNOWN,
3482          out = seg1,    out = UNKNOWN, out = NONPTR
3483                                        /*BINERROR("And32/And64")*/
3484       );
3485    }
3486    out = ( looks_like_a_pointer(result) ? out : NONPTR );
3487    return out;
3488 }
3489 
3490 // -------------
3491 // `|`| n  ?  p
3492 // -------------
3493 //  n | n  ?  p
3494 //  ? | ?  ?  ?
3495 //  p | p  ?  n
3496 // -------------
3497 /* It's OK to Or two pointers together, but the result definitely
3498    isn't a pointer.  Why would you want to do that?  Because of this:
3499      char* p1 = malloc(..);
3500      char* p2 = malloc(..);
3501      ...
3502      if (p1 || p2) { .. }
3503    In this case gcc on x86/amd64 quite literally or-s the two pointers
3504    together and throws away the result, the purpose of which is merely
3505    to sets %eflags.Z/%rflags.Z.  So we have to allow it.
3506 */
do_orW(Seg * seg1,Seg * seg2,UWord result)3507 static VG_REGPARM(3) Seg* do_orW(Seg* seg1, Seg* seg2, UWord result)
3508 {
3509    Seg* out;
3510    BINOP(
3511       return BOTTOM,
3512       out = NONPTR,  out = UNKNOWN, out = seg2,
3513       out = UNKNOWN, out = UNKNOWN, out = UNKNOWN,
3514       out = seg1,    out = UNKNOWN, out = NONPTR
3515    );
3516    out = ( looks_like_a_pointer(result) ? out : NONPTR );
3517    return out;
3518 }
3519 
3520 // -------------
3521 //  ~ | n  ?  p
3522 // -------------
3523 //    | n  n  n
3524 // -------------
do_notW(Seg * seg1,UWord result)3525 static VG_REGPARM(2) Seg* do_notW(Seg* seg1, UWord result)
3526 {
3527 #  if SC_SEGS
3528    checkSeg(seg1);
3529 #  endif
3530    if (BOTTOM == seg1) return BOTTOM;
3531    return NONPTR;
3532 }
3533 
3534 // Pointers are rarely multiplied, but sometimes legitimately, eg. as hash
3535 // function inputs.  But two pointers args --> error.
3536 // Pretend it always returns a nonptr.  Maybe improve later.
do_mulW(Seg * seg1,Seg * seg2)3537 static VG_REGPARM(2) Seg* do_mulW(Seg* seg1, Seg* seg2)
3538 {
3539 #  if SC_SEGS
3540    checkSeg(seg1);
3541    checkSeg(seg2);
3542 #  endif
3543    if (is_known_segment(seg1) && is_known_segment(seg2))
3544       h_record_arith_error(seg1, seg2, "Mul32/Mul64");
3545    return NONPTR;
3546 }
3547 
3548 
3549 /*--------------------------------------------------------------------*/
3550 /*--- Instrumentation                                              ---*/
3551 /*--------------------------------------------------------------------*/
3552 
3553 /* The h_ instrumenter that follows is complex, since it deals with
3554    shadow value computation.
3555 
3556    It also needs to generate instrumentation for the sg_ side of
3557    things.  That's relatively straightforward.  However, rather than
3558    confuse the code herein any further, we simply delegate the problem
3559    to sg_main.c, by using the four functions
3560    sg_instrument_{init,fini,IRStmt,final_jump}.  These four completely
3561    abstractify the sg_ instrumentation.  See comments in sg_main.c's
3562    instrumentation section for further details. */
3563 
3564 
3565 /* Carries info about a particular tmp.  The tmp's number is not
3566    recorded, as this is implied by (equal to) its index in the tmpMap
3567    in PCEnv.  The tmp's type is also not recorded, as this is present
3568    in PCEnv.sb->tyenv.
3569 
3570    When .kind is NonShad, .shadow may give the identity of the temp
3571    currently holding the associated shadow value, or it may be
3572    IRTemp_INVALID if code to compute the shadow has not yet been
3573    emitted.
3574 
3575    When .kind is Shad tmp holds a shadow value, and so .shadow must be
3576    IRTemp_INVALID, since it is illogical for a shadow tmp itself to be
3577    shadowed.
3578 */
3579 typedef
3580    enum { NonShad=1, Shad=2 }
3581    TempKind;
3582 
3583 typedef
3584    struct {
3585       TempKind kind;
3586       IRTemp   shadow;
3587    }
3588    TempMapEnt;
3589 
3590 
3591 
3592 /* Carries around state during Ptrcheck instrumentation. */
3593 typedef
3594    struct {
3595       /* MODIFIED: the superblock being constructed.  IRStmts are
3596          added. */
3597       IRSB* sb;
3598       Bool  trace;
3599 
3600       /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
3601          current kind and possibly shadow temps for each temp in the
3602          IRSB being constructed.  Note that it does not contain the
3603          type of each tmp.  If you want to know the type, look at the
3604          relevant entry in sb->tyenv.  It follows that at all times
3605          during the instrumentation process, the valid indices for
3606          tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
3607          total number of NonShad and Shad temps allocated so far.
3608 
3609          The reason for this strange split (types in one place, all
3610          other info in another) is that we need the types to be
3611          attached to sb so as to make it possible to do
3612          "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
3613          instrumentation process.
3614 
3615          Note that only integer temps of the guest word size are
3616          shadowed, since it is impossible (or meaningless) to hold a
3617          pointer in any other type of temp. */
3618       XArray* /* of TempMapEnt */ qmpMap;
3619 
3620       /* READONLY: the host word type.  Needed for constructing
3621          arguments of type 'HWord' to be passed to helper functions.
3622          Ity_I32 or Ity_I64 only. */
3623       IRType hWordTy;
3624 
3625       /* READONLY: the guest word type, Ity_I32 or Ity_I64 only. */
3626       IRType gWordTy;
3627 
3628       /* READONLY: the guest state size, so we can generate shadow
3629          offsets correctly. */
3630       Int guest_state_sizeB;
3631    }
3632    PCEnv;
3633 
3634 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
3635    demand), as they are encountered.  This is for two reasons.
3636 
3637    (1) (less important reason): Many original tmps are unused due to
3638    initial IR optimisation, and we do not want to spaces in tables
3639    tracking them.
3640 
3641    Shadow IRTemps are therefore allocated on demand.  pce.tmpMap is a
3642    table indexed [0 .. n_types-1], which gives the current shadow for
3643    each original tmp, or INVALID_IRTEMP if none is so far assigned.
3644    It is necessary to support making multiple assignments to a shadow
3645    -- specifically, after testing a shadow for definedness, it needs
3646    to be made defined.  But IR's SSA property disallows this.
3647 
3648    (2) (more important reason): Therefore, when a shadow needs to get
3649    a new value, a new temporary is created, the value is assigned to
3650    that, and the tmpMap is updated to reflect the new binding.
3651 
3652    A corollary is that if the tmpMap maps a given tmp to
3653    IRTemp_INVALID and we are hoping to read that shadow tmp, it means
3654    there's a read-before-write error in the original tmps.  The IR
3655    sanity checker should catch all such anomalies, however.
3656 */
3657 
3658 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
3659    both the table in pce->sb and to our auxiliary mapping.  Note that
3660    newTemp may cause pce->tmpMap to resize, hence previous results
3661    from VG_(indexXA)(pce->tmpMap) are invalidated. */
newTemp(PCEnv * pce,IRType ty,TempKind kind)3662 static IRTemp newTemp ( PCEnv* pce, IRType ty, TempKind kind )
3663 {
3664    Word       newIx;
3665    TempMapEnt ent;
3666    IRTemp     tmp = newIRTemp(pce->sb->tyenv, ty);
3667    ent.kind   = kind;
3668    ent.shadow = IRTemp_INVALID;
3669    newIx = VG_(addToXA)( pce->qmpMap, &ent );
3670    tl_assert(newIx == (Word)tmp);
3671    return tmp;
3672 }
3673 
3674 /* Find the tmp currently shadowing the given original tmp.  If none
3675    so far exists, allocate one.  */
findShadowTmp(PCEnv * pce,IRTemp orig)3676 static IRTemp findShadowTmp ( PCEnv* pce, IRTemp orig )
3677 {
3678    TempMapEnt* ent;
3679    /* VG_(indexXA) range-checks 'orig', hence no need to check
3680       here. */
3681    ent = (TempMapEnt*)VG_(indexXA)( pce->qmpMap, (Word)orig );
3682    tl_assert(ent->kind == NonShad);
3683    if (ent->shadow == IRTemp_INVALID) {
3684       IRTemp shadow = newTemp( pce, pce->gWordTy, Shad );
3685       /* newTemp may cause pce->tmpMap to resize, hence previous results
3686          from VG_(indexXA) are invalid. */
3687       ent = (TempMapEnt*)VG_(indexXA)( pce->qmpMap, (Word)orig );
3688       tl_assert(ent->kind == NonShad);
3689       tl_assert(ent->shadow == IRTemp_INVALID);
3690       ent->shadow = shadow;
3691    }
3692    return ent->shadow;
3693 }
3694 
3695 /* Allocate a new shadow for the given original tmp.  This means any
3696    previous shadow is abandoned.  This is needed because it is
3697    necessary to give a new value to a shadow once it has been tested
3698    for undefinedness, but unfortunately IR's SSA property disallows
3699    this.  Instead we must abandon the old shadow, allocate a new one
3700    and use that instead.
3701 
3702    This is the same as findShadowTmp, except we don't bother to see
3703    if a shadow temp already existed -- we simply allocate a new one
3704    regardless. */
newShadowTmp(PCEnv * pce,IRTemp orig)3705 static IRTemp newShadowTmp ( PCEnv* pce, IRTemp orig )
3706 {
3707    TempMapEnt* ent;
3708    /* VG_(indexXA) range-checks 'orig', hence no need to check
3709       here. */
3710    ent = (TempMapEnt*)VG_(indexXA)( pce->qmpMap, (Word)orig );
3711    tl_assert(ent->kind == NonShad);
3712    if (1) {
3713       IRTemp shadow = newTemp( pce, pce->gWordTy, Shad );
3714       /* newTemp may cause pce->tmpMap to resize, hence previous results
3715          from VG_(indexXA) are invalid. */
3716       ent = (TempMapEnt*)VG_(indexXA)( pce->qmpMap, (Word)orig );
3717       tl_assert(ent->kind == NonShad);
3718       ent->shadow = shadow;
3719       return shadow;
3720    }
3721    /* NOTREACHED */
3722    tl_assert(0);
3723 }
3724 
3725 
3726 /*------------------------------------------------------------*/
3727 /*--- IRAtoms -- a subset of IRExprs                       ---*/
3728 /*------------------------------------------------------------*/
3729 
3730 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
3731    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
3732    input, most of this code deals in atoms.  Usefully, a value atom
3733    always has a V-value which is also an atom: constants are shadowed
3734    by constants, and temps are shadowed by the corresponding shadow
3735    temporary. */
3736 
3737 typedef  IRExpr  IRAtom;
3738 
3739 //zz /* (used for sanity checks only): is this an atom which looks
3740 //zz    like it's from original code? */
3741 //zz static Bool isOriginalAtom ( PCEnv* pce, IRAtom* a1 )
3742 //zz {
3743 //zz    if (a1->tag == Iex_Const)
3744 //zz       return True;
3745 //zz    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < pce->n_originalTmps)
3746 //zz       return True;
3747 //zz    return False;
3748 //zz }
3749 //zz
3750 //zz /* (used for sanity checks only): is this an atom which looks
3751 //zz    like it's from shadow code? */
3752 //zz static Bool isShadowAtom ( PCEnv* pce, IRAtom* a1 )
3753 //zz {
3754 //zz    if (a1->tag == Iex_Const)
3755 //zz       return True;
3756 //zz    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= pce->n_originalTmps)
3757 //zz       return True;
3758 //zz    return False;
3759 //zz }
3760 //zz
3761 //zz /* (used for sanity checks only): check that both args are atoms and
3762 //zz    are identically-kinded. */
3763 //zz static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
3764 //zz {
3765 //zz    if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
3766 //zz       return True;
3767 //zz    if (a1->tag == Iex_Const && a2->tag == Iex_Const)
3768 //zz       return True;
3769 //zz    return False;
3770 //zz }
3771 
3772 
3773 /*------------------------------------------------------------*/
3774 /*--- Constructing IR fragments                            ---*/
3775 /*------------------------------------------------------------*/
3776 
3777 /* add stmt to a bb */
stmt(HChar cat,PCEnv * pce,IRStmt * st)3778 static inline void stmt ( HChar cat, PCEnv* pce, IRStmt* st ) {
3779    if (pce->trace) {
3780       VG_(printf)("  %c: ", cat);
3781       ppIRStmt(st);
3782       VG_(printf)("\n");
3783    }
3784    addStmtToIRSB(pce->sb, st);
3785 }
3786 
3787 /* assign value to tmp */
3788 static inline
assign(HChar cat,PCEnv * pce,IRTemp tmp,IRExpr * expr)3789 void assign ( HChar cat, PCEnv* pce, IRTemp tmp, IRExpr* expr ) {
3790    stmt(cat, pce, IRStmt_WrTmp(tmp,expr));
3791 }
3792 
3793 /* build various kinds of expressions */
3794 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
3795 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
3796 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
3797 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
3798 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
3799 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
3800 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
3801 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
3802 
3803 /* Bind the given expression to a new temporary, and return the
3804    temporary.  This effectively converts an arbitrary expression into
3805    an atom.
3806 
3807    'ty' is the type of 'e' and hence the type that the new temporary
3808    needs to be.  But passing it is redundant, since we can deduce the
3809    type merely by inspecting 'e'.  So at least that fact to assert
3810    that the two types agree. */
assignNew(HChar cat,PCEnv * pce,IRType ty,IRExpr * e)3811 static IRAtom* assignNew ( HChar cat, PCEnv* pce, IRType ty, IRExpr* e ) {
3812    IRTemp t;
3813    IRType tyE = typeOfIRExpr(pce->sb->tyenv, e);
3814    tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
3815    t = newTemp(pce, ty, Shad);
3816    assign(cat, pce, t, e);
3817    return mkexpr(t);
3818 }
3819 
3820 
3821 
3822 //-----------------------------------------------------------------------
3823 // Approach taken for range-checking for NONPTR/UNKNOWN-ness as follows.
3824 //
3825 // Range check (NONPTR/seg):
3826 // - after modifying a word-sized value in/into a TempReg:
3827 //    - {ADD, SUB, ADC, SBB, AND, OR, XOR, LEA, LEA2, NEG, NOT}L
3828 //    - BSWAP
3829 //
3830 // Range check (NONPTR/UNKNOWN):
3831 // - when introducing a new word-sized value into a TempReg:
3832 //    - MOVL l, t2
3833 //
3834 // - when copying a word-sized value which lacks a corresponding segment
3835 //   into a TempReg:
3836 //    - straddled LDL
3837 //
3838 // - when a sub-word of a word (or two) is updated:
3839 //    - SHROTL
3840 //    - {ADD, SUB, ADC, SBB, AND, OR, XOR, SHROT, NEG, NOT}[WB]
3841 //    - PUT[WB]
3842 //    - straddled   STL (2 range checks)
3843 //    - straddled   STW (2 range checks)
3844 //    - unstraddled STW
3845 //    - STB
3846 //
3847 // Just copy:
3848 // - when copying word-sized values:
3849 //    - MOVL t1, t2 (--optimise=no only)
3850 //    - CMOV
3851 //    - GETL, PUTL
3852 //    - unstraddled LDL, unstraddled STL
3853 //
3854 // - when barely changing
3855 //    - INC[LWB]/DEC[LWB]
3856 //
3857 // Set to NONPTR:
3858 // - after copying a sub-word value into a TempReg:
3859 //    - MOV[WB] l, t2
3860 //    - GET[WB]
3861 //    - unstraddled LDW
3862 //    - straddled   LDW
3863 //    - LDB
3864 //    - POP[WB]
3865 //
3866 // - after copying an obvious non-ptr into a TempReg:
3867 //    - GETF
3868 //    - CC2VAL
3869 //    - POPL
3870 //
3871 // - after copying an obvious non-ptr into a memory word:
3872 //    - FPU_W
3873 //
3874 // Do nothing:
3875 // - LOCK, INCEIP
3876 // - WIDEN[WB]
3877 // - JMP, JIFZ
3878 // - CALLM_[SE], PUSHL, CALLM, CLEAR
3879 // - FPU, FPU_R (and similar MMX/SSE ones)
3880 //
3881 
3882 
3883 
3884 
3885 /* Call h_fn (name h_nm) with the given arg, and return a new IRTemp
3886    holding the result.  The arg must be a word-typed atom.  Callee
3887    must be a VG_REGPARM(1) function. */
3888 __attribute__((noinline))
gen_dirty_W_W(PCEnv * pce,void * h_fn,HChar * h_nm,IRExpr * a1)3889 static IRTemp gen_dirty_W_W ( PCEnv* pce, void* h_fn, HChar* h_nm,
3890                               IRExpr* a1 )
3891 {
3892    IRTemp   res;
3893    IRDirty* di;
3894    tl_assert(isIRAtom(a1));
3895    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
3896    res = newTemp(pce, pce->gWordTy, Shad);
3897    di = unsafeIRDirty_1_N( res, 1/*regparms*/,
3898                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
3899                            mkIRExprVec_1( a1 ) );
3900    stmt( 'I', pce, IRStmt_Dirty(di) );
3901    return res;
3902 }
3903 
3904 /* Two-arg version of gen_dirty_W_W.  Callee must be a VG_REGPARM(2)
3905    function.*/
gen_dirty_W_WW(PCEnv * pce,void * h_fn,HChar * h_nm,IRExpr * a1,IRExpr * a2)3906 static IRTemp gen_dirty_W_WW ( PCEnv* pce, void* h_fn, HChar* h_nm,
3907                                IRExpr* a1, IRExpr* a2 )
3908 {
3909    IRTemp   res;
3910    IRDirty* di;
3911    tl_assert(isIRAtom(a1));
3912    tl_assert(isIRAtom(a2));
3913    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
3914    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
3915    res = newTemp(pce, pce->gWordTy, Shad);
3916    di = unsafeIRDirty_1_N( res, 2/*regparms*/,
3917                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
3918                            mkIRExprVec_2( a1, a2 ) );
3919    stmt( 'I', pce, IRStmt_Dirty(di) );
3920    return res;
3921 }
3922 
3923 /* Three-arg version of gen_dirty_W_W.  Callee must be a VG_REGPARM(3)
3924    function.*/
gen_dirty_W_WWW(PCEnv * pce,void * h_fn,HChar * h_nm,IRExpr * a1,IRExpr * a2,IRExpr * a3)3925 static IRTemp gen_dirty_W_WWW ( PCEnv* pce, void* h_fn, HChar* h_nm,
3926                                 IRExpr* a1, IRExpr* a2, IRExpr* a3 )
3927 {
3928    IRTemp   res;
3929    IRDirty* di;
3930    tl_assert(isIRAtom(a1));
3931    tl_assert(isIRAtom(a2));
3932    tl_assert(isIRAtom(a3));
3933    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
3934    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
3935    tl_assert(typeOfIRExpr(pce->sb->tyenv, a3) == pce->gWordTy);
3936    res = newTemp(pce, pce->gWordTy, Shad);
3937    di = unsafeIRDirty_1_N( res, 3/*regparms*/,
3938                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
3939                            mkIRExprVec_3( a1, a2, a3 ) );
3940    stmt( 'I', pce, IRStmt_Dirty(di) );
3941    return res;
3942 }
3943 
3944 /* Four-arg version of gen_dirty_W_W.  Callee must be a VG_REGPARM(3)
3945    function.*/
gen_dirty_W_WWWW(PCEnv * pce,void * h_fn,HChar * h_nm,IRExpr * a1,IRExpr * a2,IRExpr * a3,IRExpr * a4)3946 static IRTemp gen_dirty_W_WWWW ( PCEnv* pce, void* h_fn, HChar* h_nm,
3947                                  IRExpr* a1, IRExpr* a2,
3948                                  IRExpr* a3, IRExpr* a4 )
3949 {
3950    IRTemp   res;
3951    IRDirty* di;
3952    tl_assert(isIRAtom(a1));
3953    tl_assert(isIRAtom(a2));
3954    tl_assert(isIRAtom(a3));
3955    tl_assert(isIRAtom(a4));
3956    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
3957    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
3958    tl_assert(typeOfIRExpr(pce->sb->tyenv, a3) == pce->gWordTy);
3959    tl_assert(typeOfIRExpr(pce->sb->tyenv, a4) == pce->gWordTy);
3960    res = newTemp(pce, pce->gWordTy, Shad);
3961    di = unsafeIRDirty_1_N( res, 3/*regparms*/,
3962                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
3963                            mkIRExprVec_4( a1, a2, a3, a4 ) );
3964    stmt( 'I', pce, IRStmt_Dirty(di) );
3965    return res;
3966 }
3967 
3968 /* Version of gen_dirty_W_WW with no return value.  Callee must be a
3969    VG_REGPARM(2) function.  If guard is non-NULL then it is used to
3970    conditionalise the call. */
gen_dirty_v_WW(PCEnv * pce,IRExpr * guard,void * h_fn,HChar * h_nm,IRExpr * a1,IRExpr * a2)3971 static void gen_dirty_v_WW ( PCEnv* pce, IRExpr* guard,
3972                              void* h_fn, HChar* h_nm,
3973                              IRExpr* a1, IRExpr* a2 )
3974 {
3975    IRDirty* di;
3976    tl_assert(isIRAtom(a1));
3977    tl_assert(isIRAtom(a2));
3978    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
3979    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
3980    di = unsafeIRDirty_0_N( 2/*regparms*/,
3981                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
3982                            mkIRExprVec_2( a1, a2 ) );
3983    if (guard)
3984       di->guard = guard;
3985    stmt( 'I', pce, IRStmt_Dirty(di) );
3986 }
3987 
3988 /* Version of gen_dirty_W_WWW with no return value.  Callee must be a
3989    VG_REGPARM(3) function.*/
gen_dirty_v_WWW(PCEnv * pce,void * h_fn,HChar * h_nm,IRExpr * a1,IRExpr * a2,IRExpr * a3)3990 static void gen_dirty_v_WWW ( PCEnv* pce, void* h_fn, HChar* h_nm,
3991                               IRExpr* a1, IRExpr* a2, IRExpr* a3 )
3992 {
3993    IRDirty* di;
3994    tl_assert(isIRAtom(a1));
3995    tl_assert(isIRAtom(a2));
3996    tl_assert(isIRAtom(a3));
3997    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
3998    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
3999    tl_assert(typeOfIRExpr(pce->sb->tyenv, a3) == pce->gWordTy);
4000    di = unsafeIRDirty_0_N( 3/*regparms*/,
4001                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
4002                            mkIRExprVec_3( a1, a2, a3 ) );
4003    stmt( 'I', pce, IRStmt_Dirty(di) );
4004 }
4005 
4006 /* Version of gen_dirty_v_WWW for 4 arguments.  Callee must be a
4007    VG_REGPARM(3) function.*/
gen_dirty_v_WWWW(PCEnv * pce,void * h_fn,HChar * h_nm,IRExpr * a1,IRExpr * a2,IRExpr * a3,IRExpr * a4)4008 static void gen_dirty_v_WWWW ( PCEnv* pce, void* h_fn, HChar* h_nm,
4009                                IRExpr* a1, IRExpr* a2,
4010                                IRExpr* a3, IRExpr* a4 )
4011 {
4012    IRDirty* di;
4013    tl_assert(isIRAtom(a1));
4014    tl_assert(isIRAtom(a2));
4015    tl_assert(isIRAtom(a3));
4016    tl_assert(isIRAtom(a4));
4017    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
4018    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
4019    tl_assert(typeOfIRExpr(pce->sb->tyenv, a3) == pce->gWordTy);
4020    tl_assert(typeOfIRExpr(pce->sb->tyenv, a4) == pce->gWordTy);
4021    di = unsafeIRDirty_0_N( 3/*regparms*/,
4022                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
4023                            mkIRExprVec_4( a1, a2, a3, a4 ) );
4024    stmt( 'I', pce, IRStmt_Dirty(di) );
4025 }
4026 
4027 /* Version of gen_dirty_v_WWW for 6 arguments.  Callee must be a
4028    VG_REGPARM(3) function.*/
gen_dirty_v_6W(PCEnv * pce,void * h_fn,HChar * h_nm,IRExpr * a1,IRExpr * a2,IRExpr * a3,IRExpr * a4,IRExpr * a5,IRExpr * a6)4029 static void gen_dirty_v_6W ( PCEnv* pce, void* h_fn, HChar* h_nm,
4030                              IRExpr* a1, IRExpr* a2, IRExpr* a3,
4031                              IRExpr* a4, IRExpr* a5, IRExpr* a6 )
4032 {
4033    IRDirty* di;
4034    tl_assert(isIRAtom(a1));
4035    tl_assert(isIRAtom(a2));
4036    tl_assert(isIRAtom(a3));
4037    tl_assert(isIRAtom(a4));
4038    tl_assert(isIRAtom(a5));
4039    tl_assert(isIRAtom(a6));
4040    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
4041    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
4042    tl_assert(typeOfIRExpr(pce->sb->tyenv, a3) == pce->gWordTy);
4043    tl_assert(typeOfIRExpr(pce->sb->tyenv, a4) == pce->gWordTy);
4044    tl_assert(typeOfIRExpr(pce->sb->tyenv, a5) == pce->gWordTy);
4045    tl_assert(typeOfIRExpr(pce->sb->tyenv, a6) == pce->gWordTy);
4046    di = unsafeIRDirty_0_N( 3/*regparms*/,
4047                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
4048                            mkIRExprVec_6( a1, a2, a3, a4, a5, a6 ) );
4049    stmt( 'I', pce, IRStmt_Dirty(di) );
4050 }
4051 
uwiden_to_host_word(PCEnv * pce,IRAtom * a)4052 static IRAtom* uwiden_to_host_word ( PCEnv* pce, IRAtom* a )
4053 {
4054    IRType a_ty = typeOfIRExpr(pce->sb->tyenv, a);
4055    tl_assert(isIRAtom(a));
4056    if (pce->hWordTy == Ity_I32) {
4057       switch (a_ty) {
4058          case Ity_I8:
4059             return assignNew( 'I', pce, Ity_I32, unop(Iop_8Uto32, a) );
4060          case Ity_I16:
4061             return assignNew( 'I', pce, Ity_I32, unop(Iop_16Uto32, a) );
4062          default:
4063             ppIRType(a_ty);
4064             tl_assert(0);
4065       }
4066    } else {
4067       tl_assert(pce->hWordTy == Ity_I64);
4068       switch (a_ty) {
4069          case Ity_I8:
4070             return assignNew( 'I', pce, Ity_I64, unop(Iop_8Uto64, a) );
4071          case Ity_I16:
4072             return assignNew( 'I', pce, Ity_I64, unop(Iop_16Uto64, a) );
4073          case Ity_I32:
4074             return assignNew( 'I', pce, Ity_I64, unop(Iop_32Uto64, a) );
4075          default:
4076             ppIRType(a_ty);
4077             tl_assert(0);
4078       }
4079    }
4080 }
4081 
4082 /* 'e' is a word-sized atom.  Call nonptr_or_unknown with it, bind the
4083    results to a new temporary, and return the temporary.  Note this
4084    takes an original expression but returns a shadow value. */
gen_call_nonptr_or_unknown_w(PCEnv * pce,IRExpr * e)4085 static IRTemp gen_call_nonptr_or_unknown_w ( PCEnv* pce, IRExpr* e )
4086 {
4087    return gen_dirty_W_W( pce, &nonptr_or_unknown,
4088                               "nonptr_or_unknown", e );
4089 }
4090 
4091 
4092 /* Generate the shadow value for an IRExpr which is an atom and
4093    guaranteed to be word-sized. */
schemeEw_Atom(PCEnv * pce,IRExpr * e)4094 static IRAtom* schemeEw_Atom ( PCEnv* pce, IRExpr* e )
4095 {
4096    if (pce->gWordTy == Ity_I32) {
4097       if (e->tag == Iex_Const && e->Iex.Const.con->tag == Ico_U32) {
4098          IRTemp t;
4099          tl_assert(sizeof(UWord) == 4);
4100          t = gen_call_nonptr_or_unknown_w(pce, e);
4101          return mkexpr(t);
4102       }
4103       if (e->tag == Iex_RdTmp
4104           && typeOfIRExpr(pce->sb->tyenv, e) == Ity_I32) {
4105          return mkexpr( findShadowTmp(pce, e->Iex.RdTmp.tmp) );
4106       }
4107       /* there are no other word-sized atom cases */
4108    } else {
4109       if (e->tag == Iex_Const && e->Iex.Const.con->tag == Ico_U64) {
4110          IRTemp t;
4111          tl_assert(sizeof(UWord) == 8);
4112          //return mkU64( (ULong)(UWord)NONPTR );
4113          t = gen_call_nonptr_or_unknown_w(pce, e);
4114          return mkexpr(t);
4115       }
4116       if (e->tag == Iex_RdTmp
4117           && typeOfIRExpr(pce->sb->tyenv, e) == Ity_I64) {
4118          return mkexpr( findShadowTmp(pce, e->Iex.RdTmp.tmp) );
4119       }
4120       /* there are no other word-sized atom cases */
4121    }
4122    ppIRExpr(e);
4123    tl_assert(0);
4124 }
4125 
4126 
4127 static
instrument_arithop(PCEnv * pce,IRTemp dst,IRTemp dstv,IROp op,IRExpr * a1,IRExpr * a2,IRExpr * a3,IRExpr * a4)4128 void instrument_arithop ( PCEnv* pce,
4129                           IRTemp dst, /* already holds result */
4130                           IRTemp dstv, /* generate an assignment to this */
4131                           IROp op,
4132                           /* original args, guaranteed to be atoms */
4133                           IRExpr* a1, IRExpr* a2, IRExpr* a3, IRExpr* a4 )
4134 {
4135    HChar*  nm  = NULL;
4136    void*   fn  = NULL;
4137    IRExpr* a1v = NULL;
4138    IRExpr* a2v = NULL;
4139    //IRExpr* a3v = NULL;
4140    //IRExpr* a4v = NULL;
4141    IRTemp  res = IRTemp_INVALID;
4142 
4143    if (pce->gWordTy == Ity_I32) {
4144 
4145       tl_assert(pce->hWordTy == Ity_I32);
4146       switch (op) {
4147 
4148          /* For these cases, pass Segs for both arguments, and the
4149             result value. */
4150          case Iop_Add32: nm = "do_addW"; fn = &do_addW; goto ssr32;
4151          case Iop_Sub32: nm = "do_subW"; fn = &do_subW; goto ssr32;
4152          case Iop_Or32:  nm = "do_orW";  fn = &do_orW;  goto ssr32;
4153          ssr32:
4154             a1v = schemeEw_Atom( pce, a1 );
4155             a2v = schemeEw_Atom( pce, a2 );
4156             res = gen_dirty_W_WWW( pce, fn, nm, a1v, a2v, mkexpr(dst) );
4157             assign( 'I', pce, dstv, mkexpr(res) );
4158             break;
4159 
4160          /* In this case, pass Segs for both arguments, the result
4161             value, and the difference between the (original) values of
4162             the arguments. */
4163          case Iop_And32:
4164             nm = "do_andW"; fn = &do_andW;
4165             a1v = schemeEw_Atom( pce, a1 );
4166             a2v = schemeEw_Atom( pce, a2 );
4167             res = gen_dirty_W_WWWW(
4168                      pce, fn, nm, a1v, a2v, mkexpr(dst),
4169                      assignNew( 'I', pce, Ity_I32,
4170                                 binop(Iop_Sub32,a1,a2) ) );
4171             assign( 'I', pce, dstv, mkexpr(res) );
4172             break;
4173 
4174          /* Pass one shadow arg and the result to the helper. */
4175          case Iop_Not32: nm = "do_notW"; fn = &do_notW; goto vr32;
4176          vr32:
4177             a1v = schemeEw_Atom( pce, a1 );
4178             res = gen_dirty_W_WW( pce, fn, nm, a1v, mkexpr(dst) );
4179             assign( 'I', pce, dstv, mkexpr(res) );
4180             break;
4181 
4182          /* Pass two shadow args only to the helper. */
4183          case Iop_Mul32: nm = "do_mulW"; fn = &do_mulW; goto vv32;
4184          vv32:
4185             a1v = schemeEw_Atom( pce, a1 );
4186             a2v = schemeEw_Atom( pce, a2 );
4187             res = gen_dirty_W_WW( pce, fn, nm, a1v, a2v );
4188             assign( 'I', pce, dstv, mkexpr(res) );
4189             break;
4190 
4191          /* We don't really know what the result could be; test at run
4192             time. */
4193          case Iop_64HIto32: goto n_or_u_32;
4194          case Iop_64to32:   goto n_or_u_32;
4195          case Iop_Xor32:    goto n_or_u_32;
4196          n_or_u_32:
4197             assign( 'I', pce, dstv,
4198                     mkexpr(
4199                        gen_call_nonptr_or_unknown_w( pce,
4200                                                      mkexpr(dst) ) ) );
4201             break;
4202 
4203          /* Cases where it's very obvious that the result cannot be a
4204             pointer.  Hence declare directly that it's NONPTR; don't
4205             bother with the overhead of calling nonptr_or_unknown. */
4206 
4207          /* cases where it makes no sense for the result to be a ptr */
4208          /* FIXME: for Shl/Shr/Sar, really should do a test on the 2nd
4209             arg, so that shift by zero preserves the original
4210             value. */
4211          case Iop_Shl32:     goto n32;
4212          case Iop_Sar32:     goto n32;
4213          case Iop_Shr32:     goto n32;
4214          case Iop_16Uto32:   goto n32;
4215          case Iop_16Sto32:   goto n32;
4216          case Iop_F64toI32S: goto n32;
4217          case Iop_16HLto32:  goto n32;
4218          case Iop_MullS16:   goto n32;
4219          case Iop_MullU16:   goto n32;
4220          case Iop_PRemC3210F64: goto n32;
4221          case Iop_DivU32:    goto n32;
4222          case Iop_DivS32:    goto n32;
4223          case Iop_V128to32:  goto n32;
4224 
4225          /* cases where result range is very limited and clearly cannot
4226             be a pointer */
4227          case Iop_1Uto32: goto n32;
4228          case Iop_1Sto32: goto n32;
4229          case Iop_8Uto32: goto n32;
4230          case Iop_8Sto32: goto n32;
4231          case Iop_Clz32:  goto n32;
4232          case Iop_Ctz32:  goto n32;
4233          case Iop_CmpF64: goto n32;
4234          case Iop_CmpORD32S: goto n32;
4235          case Iop_CmpORD32U: goto n32;
4236          n32:
4237             assign( 'I', pce, dstv, mkU32( (UWord)NONPTR ));
4238             break;
4239 
4240          default:
4241             VG_(printf)("instrument_arithop(32-bit): unhandled: ");
4242             ppIROp(op);
4243             tl_assert(0);
4244       }
4245 
4246    } else {
4247 
4248       tl_assert(pce->gWordTy == Ity_I64);
4249       switch (op) {
4250 
4251          /* For these cases, pass Segs for both arguments, and the
4252             result value. */
4253          case Iop_Add64: nm = "do_addW"; fn = &do_addW; goto ssr64;
4254          case Iop_Sub64: nm = "do_subW"; fn = &do_subW; goto ssr64;
4255          case Iop_Or64:  nm = "do_orW";  fn = &do_orW;  goto ssr64;
4256          ssr64:
4257             a1v = schemeEw_Atom( pce, a1 );
4258             a2v = schemeEw_Atom( pce, a2 );
4259             res = gen_dirty_W_WWW( pce, fn, nm, a1v, a2v, mkexpr(dst) );
4260             assign( 'I', pce, dstv, mkexpr(res) );
4261             break;
4262 
4263          /* In this case, pass Segs for both arguments, the result
4264             value, and the difference between the (original) values of
4265             the arguments. */
4266          case Iop_And64:
4267             nm = "do_andW"; fn = &do_andW;
4268             a1v = schemeEw_Atom( pce, a1 );
4269             a2v = schemeEw_Atom( pce, a2 );
4270             res = gen_dirty_W_WWWW(
4271                      pce, fn, nm, a1v, a2v, mkexpr(dst),
4272                      assignNew( 'I', pce, Ity_I64,
4273                                 binop(Iop_Sub64,a1,a2) ) );
4274             assign( 'I', pce, dstv, mkexpr(res) );
4275             break;
4276 
4277          /* Pass one shadow arg and the result to the helper. */
4278          case Iop_Not64: nm = "do_notW"; fn = &do_notW; goto vr64;
4279          vr64:
4280             a1v = schemeEw_Atom( pce, a1 );
4281             res = gen_dirty_W_WW( pce, fn, nm, a1v, mkexpr(dst) );
4282             assign( 'I', pce, dstv, mkexpr(res) );
4283             break;
4284 
4285          /* Pass two shadow args only to the helper. */
4286          case Iop_Mul64: nm = "do_mulW"; fn = &do_mulW; goto vv64;
4287          vv64:
4288             a1v = schemeEw_Atom( pce, a1 );
4289             a2v = schemeEw_Atom( pce, a2 );
4290             res = gen_dirty_W_WW( pce, fn, nm, a1v, a2v );
4291             assign( 'I', pce, dstv, mkexpr(res) );
4292             break;
4293 
4294          /* We don't really know what the result could be; test at run
4295             time. */
4296          case Iop_Xor64:      goto n_or_u_64;
4297          case Iop_128HIto64:  goto n_or_u_64;
4298          case Iop_128to64:    goto n_or_u_64;
4299          case Iop_V128HIto64: goto n_or_u_64;
4300          case Iop_V128to64:   goto n_or_u_64;
4301          n_or_u_64:
4302             assign( 'I', pce, dstv,
4303                     mkexpr(
4304                        gen_call_nonptr_or_unknown_w( pce,
4305                                                      mkexpr(dst) ) ) );
4306             break;
4307 
4308          /* Cases where it's very obvious that the result cannot be a
4309             pointer.  Hence declare directly that it's NONPTR; don't
4310             bother with the overhead of calling nonptr_or_unknown. */
4311 
4312          /* cases where it makes no sense for the result to be a ptr */
4313          /* FIXME: for Shl/Shr/Sar, really should do a test on the 2nd
4314             arg, so that shift by zero preserves the original
4315             value. */
4316          case Iop_Shl64:      goto n64;
4317          case Iop_Sar64:      goto n64;
4318          case Iop_Shr64:      goto n64;
4319          case Iop_32Uto64:    goto n64;
4320          case Iop_32Sto64:    goto n64;
4321          case Iop_16Uto64:    goto n64;
4322          case Iop_16Sto64:    goto n64;
4323          case Iop_32HLto64:   goto n64;
4324          case Iop_DivModU64to32: goto n64;
4325          case Iop_DivModS64to32: goto n64;
4326          case Iop_F64toI64S:     goto n64;
4327          case Iop_MullS32:    goto n64;
4328          case Iop_MullU32:    goto n64;
4329          case Iop_DivU64:     goto n64;
4330          case Iop_DivS64:     goto n64;
4331          case Iop_ReinterpF64asI64: goto n64;
4332 
4333          /* cases where result range is very limited and clearly cannot
4334             be a pointer */
4335          case Iop_1Uto64:        goto n64;
4336          case Iop_8Uto64:        goto n64;
4337          case Iop_8Sto64:        goto n64;
4338          case Iop_Ctz64:         goto n64;
4339          case Iop_Clz64:         goto n64;
4340          case Iop_CmpORD64S:     goto n64;
4341          case Iop_CmpORD64U:     goto n64;
4342          /* 64-bit simd */
4343          case Iop_Avg8Ux8: case Iop_Avg16Ux4:
4344          case Iop_Max16Sx4: case Iop_Max8Ux8: case Iop_Min16Sx4:
4345          case Iop_Min8Ux8: case Iop_MulHi16Ux4:
4346          case Iop_QNarrow32Sx2: case Iop_QNarrow16Sx4:
4347          case Iop_QNarrow16Ux4: case Iop_Add8x8: case Iop_Add32x2:
4348          case Iop_QAdd8Sx8: case Iop_QAdd16Sx4: case Iop_QAdd8Ux8:
4349          case Iop_QAdd16Ux4: case Iop_Add16x4: case Iop_CmpEQ8x8:
4350          case Iop_CmpEQ32x2: case Iop_CmpEQ16x4: case Iop_CmpGT8Sx8:
4351          case Iop_CmpGT32Sx2: case Iop_CmpGT16Sx4: case Iop_MulHi16Sx4:
4352          case Iop_Mul16x4: case Iop_ShlN32x2: case Iop_ShlN16x4:
4353          case Iop_SarN32x2: case Iop_SarN16x4: case Iop_ShrN32x2: case Iop_ShrN8x8:
4354          case Iop_ShrN16x4: case Iop_Sub8x8: case Iop_Sub32x2:
4355          case Iop_QSub8Sx8: case Iop_QSub16Sx4: case Iop_QSub8Ux8:
4356          case Iop_QSub16Ux4: case Iop_Sub16x4: case Iop_InterleaveHI8x8:
4357          case Iop_InterleaveHI32x2: case Iop_InterleaveHI16x4:
4358          case Iop_InterleaveLO8x8: case Iop_InterleaveLO32x2:
4359          case Iop_InterleaveLO16x4: case Iop_SarN8x8:
4360          case Iop_Perm8x8: case Iop_ShlN8x8: case Iop_Mul32x2:
4361          case Iop_CatEvenLanes16x4: case Iop_CatOddLanes16x4:
4362          n64:
4363             assign( 'I', pce, dstv, mkU64( (UWord)NONPTR ));
4364             break;
4365 
4366          default:
4367             VG_(printf)("instrument_arithop(64-bit): unhandled: ");
4368             ppIROp(op);
4369             tl_assert(0);
4370       }
4371    }
4372 }
4373 
4374 static
gen_call_nonptr_or_unknown_range(PCEnv * pce,IRExpr * guard,IRAtom * addr,IRAtom * len)4375 void gen_call_nonptr_or_unknown_range ( PCEnv* pce,
4376                                         IRExpr* guard,
4377                                         IRAtom* addr, IRAtom* len )
4378 {
4379    gen_dirty_v_WW( pce, guard,
4380                    &nonptr_or_unknown_range,
4381                    "nonptr_or_unknown_range",
4382                    addr, len );
4383 }
4384 
4385 /* iii describes zero or more non-exact integer register updates.  For
4386    each one, generate IR to get the containing register, apply
4387    nonptr_or_unknown to it, and write it back again. */
gen_nonptr_or_unknown_for_III(PCEnv * pce,IntRegInfo * iii)4388 static void gen_nonptr_or_unknown_for_III( PCEnv* pce, IntRegInfo* iii )
4389 {
4390    Int i;
4391    tl_assert(iii && iii->n_offsets >= 0);
4392    for (i = 0; i < iii->n_offsets; i++) {
4393       IRAtom* a1 = assignNew( 'I', pce, pce->gWordTy,
4394                               IRExpr_Get( iii->offsets[i], pce->gWordTy ));
4395       IRTemp a2 = gen_call_nonptr_or_unknown_w( pce, a1 );
4396       stmt( 'I', pce, IRStmt_Put( iii->offsets[i]
4397                                      + pce->guest_state_sizeB,
4398                                   mkexpr(a2) ));
4399    }
4400 }
4401 
4402 
4403 /* schemeS helper for doing stores, pulled out into a function because
4404    it needs to handle both normal stores and store-conditionals.
4405    Returns False if we see a case we don't know how to handle.
4406 */
schemeS_store(PCEnv * pce,IRExpr * data,IRExpr * addr,IRTemp resSC)4407 static Bool schemeS_store ( PCEnv* pce,
4408                             IRExpr* data, IRExpr* addr, IRTemp resSC )
4409 {
4410    /* We have: STle(addr) = data
4411       if data is int-word sized, do
4412       check_store4(addr, addr#, data, data#)
4413       for all other stores
4414       check_store{1,2}(addr, addr#, data)
4415 
4416       The helper actually *does* the store, so that it can do the
4417       post-hoc ugly hack of inspecting and "improving" the shadow data
4418       after the store, in the case where it isn't an aligned word
4419       store.
4420 
4421       Only word-sized values are shadowed.  If this is a
4422       store-conditional, .resSC will denote a non-word-typed temp, and
4423       so we don't need to shadow it.  Assert about the type, tho.
4424       However, since we're not re-emitting the original IRStmt_Store,
4425       but rather doing it as part of the helper function, we need to
4426       actually do a SC in the helper, and assign the result bit to
4427       .resSC.  Ugly.
4428    */
4429    IRType  d_ty  = typeOfIRExpr(pce->sb->tyenv, data);
4430    IRExpr* addrv = schemeEw_Atom( pce, addr );
4431    if (resSC != IRTemp_INVALID) {
4432       tl_assert(typeOfIRTemp(pce->sb->tyenv, resSC) == Ity_I1);
4433       /* viz, not something we want to shadow */
4434       /* also, throw out all store-conditional cases that
4435          we can't handle */
4436       if (pce->gWordTy == Ity_I32 && d_ty != Ity_I32)
4437          return False;
4438       if (pce->gWordTy == Ity_I64 && d_ty != Ity_I32 && d_ty != Ity_I64)
4439          return False;
4440    }
4441    if (pce->gWordTy == Ity_I32) {
4442       /* ------ 32 bit host/guest (cough, cough) ------ */
4443       switch (d_ty) {
4444          /* Integer word case */
4445          case Ity_I32: {
4446             IRExpr* datav = schemeEw_Atom( pce, data );
4447             if (resSC == IRTemp_INVALID) {
4448                /* "normal" store */
4449                gen_dirty_v_WWWW( pce,
4450                                  &check_store4_P, "check_store4_P",
4451                                  addr, addrv, data, datav );
4452             } else {
4453                /* store-conditional; need to snarf the success bit */
4454                IRTemp resSC32
4455                    = gen_dirty_W_WWWW( pce,
4456                                        &check_store4C_P,
4457                                        "check_store4C_P",
4458                                        addr, addrv, data, datav );
4459                /* presumably resSC32 will really be Ity_I32.  In
4460                   any case we'll get jumped by the IR sanity
4461                   checker if it's not, when it sees the
4462                   following statement. */
4463                assign( 'I', pce, resSC, unop(Iop_32to1, mkexpr(resSC32)) );
4464             }
4465             break;
4466          }
4467          /* Integer subword cases */
4468          case Ity_I16:
4469             gen_dirty_v_WWW( pce,
4470                              &check_store2, "check_store2",
4471                              addr, addrv,
4472                              uwiden_to_host_word( pce, data ));
4473             break;
4474          case Ity_I8:
4475             gen_dirty_v_WWW( pce,
4476                              &check_store1, "check_store1",
4477                              addr, addrv,
4478                              uwiden_to_host_word( pce, data ));
4479             break;
4480          /* 64-bit float.  Pass store data in 2 32-bit pieces. */
4481          case Ity_F64: {
4482             IRAtom* d64 = assignNew( 'I', pce, Ity_I64,
4483                                      unop(Iop_ReinterpF64asI64, data) );
4484             IRAtom* dLo32 = assignNew( 'I', pce, Ity_I32,
4485                                        unop(Iop_64to32, d64) );
4486             IRAtom* dHi32 = assignNew( 'I', pce, Ity_I32,
4487                                        unop(Iop_64HIto32, d64) );
4488             gen_dirty_v_WWWW( pce,
4489                               &check_store8_ms4B_ls4B,
4490                               "check_store8_ms4B_ls4B",
4491                               addr, addrv, dHi32, dLo32 );
4492             break;
4493          }
4494          /* 32-bit float.  We can just use _store4, but need
4495             to futz with the argument type. */
4496          case Ity_F32: {
4497             IRAtom* i32 = assignNew( 'I', pce, Ity_I32,
4498                                      unop(Iop_ReinterpF32asI32,
4499                                           data ) );
4500             gen_dirty_v_WWW( pce,
4501                              &check_store4,
4502                              "check_store4",
4503                              addr, addrv, i32 );
4504             break;
4505          }
4506          /* 64-bit int.  Pass store data in 2 32-bit pieces. */
4507          case Ity_I64: {
4508             IRAtom* dLo32 = assignNew( 'I', pce, Ity_I32,
4509                                        unop(Iop_64to32, data) );
4510             IRAtom* dHi32 = assignNew( 'I', pce, Ity_I32,
4511                                        unop(Iop_64HIto32, data) );
4512             gen_dirty_v_WWWW( pce,
4513                               &check_store8_ms4B_ls4B,
4514                               "check_store8_ms4B_ls4B",
4515                               addr, addrv, dHi32, dLo32 );
4516             break;
4517          }
4518          /* 128-bit vector.  Pass store data in 4 32-bit pieces.
4519             This is all very ugly and inefficient, but it is
4520             hard to better without considerably complicating the
4521             store-handling schemes. */
4522          case Ity_V128: {
4523             IRAtom* dHi64 = assignNew( 'I', pce, Ity_I64,
4524                                        unop(Iop_V128HIto64, data) );
4525             IRAtom* dLo64 = assignNew( 'I', pce, Ity_I64,
4526                                        unop(Iop_V128to64, data) );
4527             IRAtom* w3    = assignNew( 'I', pce, Ity_I32,
4528                                        unop(Iop_64HIto32, dHi64) );
4529             IRAtom* w2    = assignNew( 'I', pce, Ity_I32,
4530                                        unop(Iop_64to32, dHi64) );
4531             IRAtom* w1    = assignNew( 'I', pce, Ity_I32,
4532                                        unop(Iop_64HIto32, dLo64) );
4533             IRAtom* w0    = assignNew( 'I', pce, Ity_I32,
4534                                        unop(Iop_64to32, dLo64) );
4535             gen_dirty_v_6W( pce,
4536                             &check_store16_ms4B_4B_4B_ls4B,
4537                             "check_store16_ms4B_4B_4B_ls4B",
4538                             addr, addrv, w3, w2, w1, w0 );
4539             break;
4540          }
4541          default:
4542             ppIRType(d_ty); tl_assert(0);
4543       }
4544    } else {
4545       /* ------ 64 bit host/guest (cough, cough) ------ */
4546       switch (d_ty) {
4547          /* Integer word case */
4548          case Ity_I64: {
4549             IRExpr* datav = schemeEw_Atom( pce, data );
4550             if (resSC == IRTemp_INVALID) {
4551                /* "normal" store */
4552                gen_dirty_v_WWWW( pce,
4553                                  &check_store8_P, "check_store8_P",
4554                                  addr, addrv, data, datav );
4555             } else {
4556                IRTemp resSC64
4557                    = gen_dirty_W_WWWW( pce,
4558                                        &check_store8C_P,
4559                                        "check_store8C_P",
4560                                        addr, addrv, data, datav );
4561                assign( 'I', pce, resSC, unop(Iop_64to1, mkexpr(resSC64)) );
4562             }
4563             break;
4564          }
4565          /* Integer subword cases */
4566          case Ity_I32:
4567             if (resSC == IRTemp_INVALID) {
4568                /* "normal" store */
4569                gen_dirty_v_WWW( pce,
4570                                 &check_store4, "check_store4",
4571                                 addr, addrv,
4572                                 uwiden_to_host_word( pce, data ));
4573             } else {
4574                /* store-conditional; need to snarf the success bit */
4575                IRTemp resSC64
4576                    = gen_dirty_W_WWW( pce,
4577                                       &check_store4C,
4578                                       "check_store4C",
4579                                       addr, addrv,
4580                                       uwiden_to_host_word( pce, data ));
4581                assign( 'I', pce, resSC, unop(Iop_64to1, mkexpr(resSC64)) );
4582             }
4583             break;
4584          case Ity_I16:
4585             gen_dirty_v_WWW( pce,
4586                              &check_store2, "check_store2",
4587                              addr, addrv,
4588                              uwiden_to_host_word( pce, data ));
4589             break;
4590          case Ity_I8:
4591             gen_dirty_v_WWW( pce,
4592                              &check_store1, "check_store1",
4593                              addr, addrv,
4594                              uwiden_to_host_word( pce, data ));
4595             break;
4596          /* 128-bit vector.  Pass store data in 2 64-bit pieces. */
4597          case Ity_V128: {
4598             IRAtom* dHi64 = assignNew( 'I', pce, Ity_I64,
4599                                        unop(Iop_V128HIto64, data) );
4600             IRAtom* dLo64 = assignNew( 'I', pce, Ity_I64,
4601                                        unop(Iop_V128to64, data) );
4602             gen_dirty_v_WWWW( pce,
4603                               &check_store16_ms8B_ls8B,
4604                               "check_store16_ms8B_ls8B",
4605                               addr, addrv, dHi64, dLo64 );
4606             break;
4607          }
4608          /* 64-bit float. */
4609          case Ity_F64: {
4610             IRAtom* dI = assignNew( 'I', pce, Ity_I64,
4611                                      unop(Iop_ReinterpF64asI64,
4612                                           data ) );
4613             gen_dirty_v_WWW( pce,
4614                              &check_store8_all8B,
4615                              "check_store8_all8B",
4616                              addr, addrv, dI );
4617             break;
4618          }
4619          /* 32-bit float.  We can just use _store4, but need
4620             to futz with the argument type. */
4621          case Ity_F32: {
4622             IRAtom* i32 = assignNew( 'I', pce, Ity_I32,
4623                                      unop(Iop_ReinterpF32asI32,
4624                                           data ) );
4625             IRAtom* i64 = assignNew( 'I', pce, Ity_I64,
4626                                      unop(Iop_32Uto64,
4627                                           i32 ) );
4628             gen_dirty_v_WWW( pce,
4629                              &check_store4,
4630                              "check_store4",
4631                              addr, addrv, i64 );
4632             break;
4633          }
4634          default:
4635             ppIRType(d_ty); tl_assert(0);
4636       }
4637    }
4638    /* And don't copy the original, since the helper does the store.
4639       Ick. */
4640    return True; /* store was successfully instrumented */
4641 }
4642 
4643 
4644 /* schemeS helper for doing loads, pulled out into a function because
4645    it needs to handle both normal loads and load-linked's.
4646 */
schemeS_load(PCEnv * pce,IRExpr * addr,IRType e_ty,IRTemp dstv)4647 static void schemeS_load ( PCEnv* pce, IRExpr* addr, IRType e_ty, IRTemp dstv )
4648 {
4649    HChar*  h_nm  = NULL;
4650    void*   h_fn  = NULL;
4651    IRExpr* addrv = NULL;
4652    if (e_ty == pce->gWordTy) {
4653       tl_assert(dstv != IRTemp_INVALID);
4654    } else {
4655       tl_assert(dstv == IRTemp_INVALID);
4656    }
4657    if (pce->gWordTy == Ity_I32) {
4658       /* 32 bit host/guest (cough, cough) */
4659       switch (e_ty) {
4660          /* Ity_I32: helper returns shadow value. */
4661          case Ity_I32:  h_fn = &check_load4_P;
4662                         h_nm = "check_load4_P"; break;
4663          /* all others: helper does not return a shadow
4664             value. */
4665          case Ity_V128: h_fn = &check_load16;
4666                         h_nm = "check_load16"; break;
4667          case Ity_I64:
4668          case Ity_F64:  h_fn = &check_load8;
4669                         h_nm = "check_load8"; break;
4670          case Ity_F32:  h_fn = &check_load4;
4671                         h_nm = "check_load4"; break;
4672          case Ity_I16:  h_fn = &check_load2;
4673                         h_nm = "check_load2"; break;
4674          case Ity_I8:   h_fn = &check_load1;
4675                         h_nm = "check_load1"; break;
4676          default: ppIRType(e_ty); tl_assert(0);
4677       }
4678       addrv = schemeEw_Atom( pce, addr );
4679       if (e_ty == Ity_I32) {
4680          assign( 'I', pce, dstv,
4681                   mkexpr( gen_dirty_W_WW( pce, h_fn, h_nm,
4682                                                addr, addrv )) );
4683       } else {
4684          gen_dirty_v_WW( pce, NULL, h_fn, h_nm, addr, addrv );
4685       }
4686    } else {
4687       /* 64 bit host/guest (cough, cough) */
4688       switch (e_ty) {
4689          /* Ity_I64: helper returns shadow value. */
4690          case Ity_I64:  h_fn = &check_load8_P;
4691                         h_nm = "check_load8_P"; break;
4692          /* all others: helper does not return a shadow
4693             value. */
4694          case Ity_V128: h_fn = &check_load16;
4695                         h_nm = "check_load16"; break;
4696          case Ity_F64:  h_fn = &check_load8;
4697                         h_nm = "check_load8"; break;
4698          case Ity_F32:
4699          case Ity_I32:  h_fn = &check_load4;
4700                         h_nm = "check_load4"; break;
4701          case Ity_I16:  h_fn = &check_load2;
4702                         h_nm = "check_load2"; break;
4703          case Ity_I8:   h_fn = &check_load1;
4704                         h_nm = "check_load1"; break;
4705          default: ppIRType(e_ty); tl_assert(0);
4706       }
4707       addrv = schemeEw_Atom( pce, addr );
4708       if (e_ty == Ity_I64) {
4709          assign( 'I', pce, dstv,
4710                   mkexpr( gen_dirty_W_WW( pce, h_fn, h_nm,
4711                                                addr, addrv )) );
4712       } else {
4713          gen_dirty_v_WW( pce, NULL, h_fn, h_nm, addr, addrv );
4714       }
4715    }
4716 }
4717 
4718 
4719 /* Generate into 'pce', instrumentation for 'st'.  Also copy 'st'
4720    itself into 'pce' (the caller does not do so).  This is somewhat
4721    complex and relies heavily on the assumption that the incoming IR
4722    is in flat form.
4723 
4724    Generally speaking, the instrumentation is placed after the
4725    original statement, so that results computed by the original can be
4726    used in the instrumentation.  However, that isn't safe for memory
4727    references, since we need the instrumentation (hence bounds check
4728    and potential error message) to happen before the reference itself,
4729    as the latter could cause a fault. */
schemeS(PCEnv * pce,IRStmt * st)4730 static void schemeS ( PCEnv* pce, IRStmt* st )
4731 {
4732    tl_assert(st);
4733    tl_assert(isFlatIRStmt(st));
4734 
4735    switch (st->tag) {
4736 
4737       case Ist_CAS: {
4738          /* In all these CAS cases, the did-we-succeed? comparison is
4739             done using Iop_CasCmpEQ{8,16,32,64} rather than the plain
4740             Iop_CmpEQ equivalents.  This isn't actually necessary,
4741             since the generated IR is not going to be subsequently
4742             instrumented by Memcheck.  But it's done for consistency.
4743             See COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
4744             background/rationale. */
4745          IRCAS* cas = st->Ist.CAS.details;
4746          IRType elTy = typeOfIRExpr(pce->sb->tyenv, cas->expdLo);
4747          if (cas->oldHi == IRTemp_INVALID) {
4748             /* ------------ SINGLE CAS ------------ */
4749             /* -- single cas -- 32 bits, on 32-bit host -- */
4750             /* -- single cas -- 64 bits, on 64-bit host -- */
4751             /* -- viz, single cas, native-word case -- */
4752             if ( (pce->gWordTy == Ity_I32 && elTy == Ity_I32)
4753                  || (pce->gWordTy == Ity_I64 && elTy == Ity_I64) ) {
4754                // 32 bit host translation scheme; 64-bit is analogous
4755                // old#    = check_load4_P(addr, addr#)
4756                // old     = CAS(addr:expd->new) [COPY]
4757                // success = CasCmpEQ32(old,expd)
4758                // if (success) do_shadow_store4_P(addr, new#)
4759                IRTemp  success;
4760                Bool    is64  = elTy == Ity_I64;
4761                IROp    cmpEQ = is64 ? Iop_CasCmpEQ64 : Iop_CasCmpEQ32;
4762                void*   r_fn  = is64 ? &check_load8_P  : &check_load4_P;
4763                HChar*  r_nm  = is64 ? "check_load8_P" : "check_load4_P";
4764                void*   w_fn  = is64 ? &do_shadow_store8_P  : &do_shadow_store4_P;
4765                void*   w_nm  = is64 ? "do_shadow_store8_P" : "do_shadow_store4_P";
4766                IRExpr* addr  = cas->addr;
4767                IRExpr* addrV = schemeEw_Atom(pce, addr);
4768                IRTemp  old   = cas->oldLo;
4769                IRTemp  oldV  = newShadowTmp(pce, old);
4770                IRExpr* nyu   = cas->dataLo;
4771                IRExpr* nyuV  = schemeEw_Atom(pce, nyu);
4772                IRExpr* expd  = cas->expdLo;
4773                assign( 'I', pce, oldV,
4774                        mkexpr( gen_dirty_W_WW( pce, r_fn, r_nm, addr, addrV )));
4775                stmt( 'C', pce, st );
4776                success = newTemp(pce, Ity_I1, NonShad);
4777                assign('I', pce, success, binop(cmpEQ, mkexpr(old), expd));
4778                gen_dirty_v_WW( pce, mkexpr(success), w_fn, w_nm, addr, nyuV );
4779             }
4780             else
4781             /* -- single cas -- 8 or 16 bits, on 32-bit host -- */
4782             /* -- viz, single cas, 32-bit subword cases -- */
4783             if (pce->gWordTy == Ity_I32
4784                 && (elTy == Ity_I8 || elTy == Ity_I16)) {
4785                // 8-bit translation scheme; 16-bit is analogous
4786                // check_load1(addr, addr#)
4787                // old     = CAS(addr:expd->new) [COPY]
4788                // success = CasCmpEQ8(old,expd)
4789                // if (success) nonptr_or_unknown_range(addr, 1)
4790                IRTemp  success;
4791                Bool    is16  = elTy == Ity_I16;
4792                IRExpr* addr  = cas->addr;
4793                IRExpr* addrV = schemeEw_Atom(pce, addr);
4794                IRTemp  old   = cas->oldLo;
4795                IRExpr* expd  = cas->expdLo;
4796                void*   h_fn  = is16 ? &check_load2  : &check_load1;
4797                HChar*  h_nm  = is16 ? "check_load2" : "check_load1";
4798                IROp    cmpEQ = is16 ? Iop_CasCmpEQ16 : Iop_CasCmpEQ8;
4799                Int     szB   = is16 ? 2 : 1;
4800                gen_dirty_v_WW( pce, NULL, h_fn, h_nm, addr, addrV );
4801                stmt( 'C', pce, st );
4802                success = newTemp(pce, Ity_I1, NonShad);
4803                assign('I', pce, success,
4804                            binop(cmpEQ, mkexpr(old), expd));
4805                gen_call_nonptr_or_unknown_range( pce, mkexpr(success),
4806                                                  addr, mkIRExpr_HWord(szB) );
4807             }
4808             else
4809             /* -- single cas -- 8, 16 or 32 bits, on 64-bit host -- */
4810             /* -- viz, single cas, 64-bit subword cases -- */
4811             if (pce->gWordTy == Ity_I64
4812                 && (elTy == Ity_I8 || elTy == Ity_I16 || elTy == Ity_I32)) {
4813                // 8-bit translation scheme; 16/32-bit are analogous
4814                // check_load1(addr, addr#)
4815                // old     = CAS(addr:expd->new) [COPY]
4816                // success = CasCmpEQ8(old,expd)
4817                // if (success) nonptr_or_unknown_range(addr, 1)
4818                IRTemp  success;
4819                Bool    is16  = elTy == Ity_I16;
4820                Bool    is32  = elTy == Ity_I32;
4821                IRExpr* addr  = cas->addr;
4822                IRExpr* addrV = schemeEw_Atom(pce, addr);
4823                IRTemp  old   = cas->oldLo;
4824                IRExpr* expd  = cas->expdLo;
4825                void*   h_fn  = is32 ? &check_load4
4826                                     : (is16 ? &check_load2 : &check_load1);
4827                HChar*  h_nm  = is32 ? "check_load4"
4828                                     : (is16 ? "check_load2" : "check_load1");
4829                IROp    cmpEQ = is32 ? Iop_CasCmpEQ32
4830                                     : (is16 ? Iop_CasCmpEQ16 : Iop_CasCmpEQ8);
4831                Int     szB   = is32 ? 4 : (is16 ? 2 : 1);
4832                gen_dirty_v_WW( pce, NULL, h_fn, h_nm, addr, addrV );
4833                stmt( 'C', pce, st );
4834                success = newTemp(pce, Ity_I1, NonShad);
4835                assign('I', pce, success,
4836                            binop(cmpEQ, mkexpr(old), expd));
4837                gen_call_nonptr_or_unknown_range( pce, mkexpr(success),
4838                                                  addr, mkIRExpr_HWord(szB) );
4839             }
4840             else
4841                goto unhandled;
4842          } else {
4843             /* ------------ DOUBLE CAS ------------ */
4844             /* Punt on bigendian DCAS.  In fact it's probably trivial
4845                to do; just swap the individual shadow loads/stores
4846                around in memory, but we'd have to verify it, and there
4847                is no use case.  So punt. */
4848             if (cas->end != Iend_LE)
4849                goto unhandled;
4850             /* -- double cas -- 2 x 32 bits, on 32-bit host -- */
4851             /* -- double cas -- 2 x 64 bits, on 64-bit host -- */
4852             /* -- viz, double cas, native-word case -- */
4853             if ( (pce->gWordTy == Ity_I32 && elTy == Ity_I32)
4854                  || (pce->gWordTy == Ity_I64 && elTy == Ity_I64) ) {
4855                // 32 bit host translation scheme; 64-bit is analogous
4856                // oldHi#    = check_load4_P(addr+4, addr#)
4857                // oldLo#    = check_load4_P(addr+0, addr#)
4858                // oldHi/Lo  = DCAS(addr:expdHi/Lo->newHi/Lo) [COPY]
4859                // success   = CasCmpEQ32(oldHi,expdHi) && CasCmpEQ32(oldLo,expdLo)
4860                //           = ((oldHi ^ expdHi) | (oldLo ^ expdLo)) == 0
4861                // if (success) do_shadow_store4_P(addr+4, newHi#)
4862                // if (success) do_shadow_store4_P(addr+0, newLo#)
4863                IRTemp  diffHi, diffLo, diff, success, addrpp;
4864                Bool    is64       = elTy == Ity_I64;
4865                void*   r_fn       = is64 ? &check_load8_P  : &check_load4_P;
4866                HChar*  r_nm       = is64 ? "check_load8_P" : "check_load4_P";
4867                void*   w_fn       = is64 ? &do_shadow_store8_P
4868                                          : &do_shadow_store4_P;
4869                void*   w_nm       = is64 ? "do_shadow_store8_P"
4870                                          : "do_shadow_store4_P";
4871                IROp    opADD      = is64 ? Iop_Add64 : Iop_Add32;
4872                IROp    opXOR      = is64 ? Iop_Xor64 : Iop_Xor32;
4873                IROp    opOR       = is64 ? Iop_Or64 : Iop_Or32;
4874                IROp    opCasCmpEQ = is64 ? Iop_CasCmpEQ64 : Iop_CasCmpEQ32;
4875                IRExpr* step       = is64 ? mkU64(8) : mkU32(4);
4876                IRExpr* zero       = is64 ? mkU64(0) : mkU32(0);
4877                IRExpr* addr       = cas->addr;
4878                IRExpr* addrV      = schemeEw_Atom(pce, addr);
4879                IRTemp  oldLo      = cas->oldLo;
4880                IRTemp  oldLoV     = newShadowTmp(pce, oldLo);
4881                IRTemp  oldHi      = cas->oldHi;
4882                IRTemp  oldHiV     = newShadowTmp(pce, oldHi);
4883                IRExpr* nyuLo      = cas->dataLo;
4884                IRExpr* nyuLoV     = schemeEw_Atom(pce, nyuLo);
4885                IRExpr* nyuHi      = cas->dataHi;
4886                IRExpr* nyuHiV     = schemeEw_Atom(pce, nyuHi);
4887                IRExpr* expdLo     = cas->expdLo;
4888                IRExpr* expdHi     = cas->expdHi;
4889                tl_assert(elTy == Ity_I32 || elTy == Ity_I64);
4890                tl_assert(pce->gWordTy == elTy);
4891                addrpp = newTemp(pce, elTy, NonShad);
4892                assign('I', pce, addrpp, binop(opADD, addr, step));
4893                assign('I', pce, oldHiV,
4894                       mkexpr( gen_dirty_W_WW( pce, r_fn, r_nm,
4895                                                    mkexpr(addrpp), addrV ))
4896                );
4897                assign('I', pce, oldLoV,
4898                       mkexpr( gen_dirty_W_WW( pce, r_fn, r_nm,
4899                                                    addr, addrV ))
4900                );
4901                stmt( 'C', pce, st );
4902                diffHi = newTemp(pce, elTy, NonShad);
4903                assign('I', pce, diffHi,
4904                            binop(opXOR, mkexpr(oldHi), expdHi));
4905                diffLo = newTemp(pce, elTy, NonShad);
4906                assign('I', pce, diffLo,
4907                            binop(opXOR, mkexpr(oldLo), expdLo));
4908                diff = newTemp(pce, elTy, NonShad);
4909                assign('I', pce, diff,
4910                       binop(opOR, mkexpr(diffHi), mkexpr(diffLo)));
4911                success = newTemp(pce, Ity_I1, NonShad);
4912                assign('I', pce, success,
4913                       binop(opCasCmpEQ, mkexpr(diff), zero));
4914                gen_dirty_v_WW( pce, mkexpr(success),
4915                                      w_fn, w_nm, mkexpr(addrpp), nyuHiV );
4916                gen_dirty_v_WW( pce, mkexpr(success),
4917                                     w_fn, w_nm, addr, nyuLoV );
4918             }
4919             else
4920             /* -- double cas -- 2 x 32 bits, on 64-bit host -- */
4921             if (pce->gWordTy == Ity_I64 && elTy == Ity_I32) {
4922                // check_load8(addr, addr#)
4923                // oldHi/Lo  = DCAS(addr:expdHi/Lo->newHi/Lo) [COPY]
4924                // success   = CasCmpEQ32(oldHi,expdHi) && CasCmpEQ32(oldLo,expdLo)
4925                //           = ((oldHi ^ expdHi) | (oldLo ^ expdLo)) == 0
4926                // if (success) nonptr_or_unknown_range(addr, 8)
4927                IRTemp  diffHi, diffLo, diff, success;
4928                IRExpr* addr   = cas->addr;
4929                IRExpr* addrV  = schemeEw_Atom(pce, addr);
4930                IRTemp  oldLo  = cas->oldLo;
4931                IRTemp  oldHi  = cas->oldHi;
4932                IRExpr* expdLo = cas->expdLo;
4933                IRExpr* expdHi = cas->expdHi;
4934                gen_dirty_v_WW( pce, NULL, &check_load8, "check_load8",
4935                                addr, addrV );
4936                stmt( 'C', pce, st );
4937                diffHi = newTemp(pce, Ity_I32, NonShad);
4938                assign('I', pce, diffHi,
4939                            binop(Iop_Xor32, mkexpr(oldHi), expdHi));
4940                diffLo = newTemp(pce, Ity_I32, NonShad);
4941                assign('I', pce, diffLo,
4942                            binop(Iop_Xor32, mkexpr(oldLo), expdLo));
4943                diff = newTemp(pce, Ity_I32, NonShad);
4944                assign('I', pce, diff,
4945                       binop(Iop_Or32, mkexpr(diffHi), mkexpr(diffLo)));
4946                success = newTemp(pce, Ity_I1, NonShad);
4947                assign('I', pce, success,
4948                       binop(Iop_CasCmpEQ32, mkexpr(diff), mkU32(0)));
4949                gen_call_nonptr_or_unknown_range( pce, mkexpr(success),
4950                                                  addr, mkU64(8) );
4951             }
4952             else
4953                goto unhandled;
4954          }
4955          break;
4956       }
4957 
4958       case Ist_LLSC: {
4959          if (st->Ist.LLSC.storedata == NULL) {
4960             /* LL */
4961             IRTemp dst    = st->Ist.LLSC.result;
4962             IRType dataTy = typeOfIRTemp(pce->sb->tyenv, dst);
4963             Bool   isWord = dataTy == pce->gWordTy;
4964             IRTemp dstv   = isWord ? newShadowTmp( pce, dst )
4965                                    : IRTemp_INVALID;
4966             schemeS_load( pce, st->Ist.LLSC.addr, dataTy, dstv );
4967             /* copy the original -- must happen after the helper call */
4968             stmt( 'C', pce, st );
4969          } else {
4970             /* SC */
4971             schemeS_store( pce,
4972                            st->Ist.LLSC.storedata,
4973                            st->Ist.LLSC.addr,
4974                            st->Ist.LLSC.result );
4975             /* Don't copy the original, since the helper does the
4976                store itself. */
4977          }
4978          break;
4979       }
4980 
4981       case Ist_Dirty: {
4982          Int i;
4983          IRDirty* di;
4984          stmt( 'C', pce, st );
4985          /* nasty.  assumes that (1) all helpers are unconditional,
4986             and (2) all outputs are non-ptr */
4987          di = st->Ist.Dirty.details;
4988          /* deal with the return tmp, if any */
4989          if (di->tmp != IRTemp_INVALID
4990              && typeOfIRTemp(pce->sb->tyenv, di->tmp) == pce->gWordTy) {
4991             /* di->tmp is shadowed.  Set it to NONPTR. */
4992             IRTemp dstv = newShadowTmp( pce, di->tmp );
4993             if (pce->gWordTy == Ity_I32) {
4994               assign( 'I', pce, dstv, mkU32( (UWord)NONPTR ));
4995             } else {
4996               assign( 'I', pce, dstv, mkU64( (UWord)NONPTR ));
4997             }
4998          }
4999          /* apply the nonptr_or_unknown technique to any parts of
5000             the guest state that happen to get written */
5001          for (i = 0; i < di->nFxState; i++) {
5002             IntRegInfo iii;
5003             tl_assert(di->fxState[i].fx != Ifx_None);
5004             if (di->fxState[i].fx == Ifx_Read)
5005                continue; /* this bit is only read -- not interesting */
5006             get_IntRegInfo( &iii, di->fxState[i].offset,
5007                                   di->fxState[i].size );
5008             tl_assert(iii.n_offsets >= -1
5009                       && iii.n_offsets <= N_INTREGINFO_OFFSETS);
5010             /* Deal with 3 possible cases, same as with Ist_Put
5011                elsewhere in this function. */
5012             if (iii.n_offsets == -1) {
5013                /* case (1): exact write of an integer register. */
5014                IRAtom* a1
5015                   = assignNew( 'I', pce, pce->gWordTy,
5016                                IRExpr_Get( iii.offsets[i], pce->gWordTy ));
5017                IRTemp a2 = gen_call_nonptr_or_unknown_w( pce, a1 );
5018                stmt( 'I', pce, IRStmt_Put( iii.offsets[i]
5019                                               + pce->guest_state_sizeB,
5020                                            mkexpr(a2) ));
5021             } else {
5022                /* when == 0: case (3): no instrumentation needed */
5023                /* when > 0: case (2) .. complex case.  Fish out the
5024                   stored value for the whole register, heave it
5025                   through nonptr_or_unknown, and use that as the new
5026                   shadow value. */
5027                tl_assert(iii.n_offsets >= 0
5028                          && iii.n_offsets <= N_INTREGINFO_OFFSETS);
5029                gen_nonptr_or_unknown_for_III( pce, &iii );
5030             }
5031          } /* for (i = 0; i < di->nFxState; i++) */
5032          /* finally, deal with memory outputs */
5033          if (di->mFx != Ifx_None) {
5034             tl_assert(di->mAddr && isIRAtom(di->mAddr));
5035             tl_assert(di->mSize > 0);
5036             gen_call_nonptr_or_unknown_range( pce, NULL, di->mAddr,
5037                                               mkIRExpr_HWord(di->mSize));
5038          }
5039          break;
5040       }
5041 
5042       case Ist_NoOp:
5043          break;
5044 
5045       /* nothing interesting in these; just copy them through */
5046       case Ist_AbiHint:
5047       case Ist_MBE:
5048       case Ist_Exit:
5049       case Ist_IMark:
5050          stmt( 'C', pce, st );
5051          break;
5052 
5053       case Ist_PutI: {
5054          IRRegArray* descr = st->Ist.PutI.descr;
5055          stmt( 'C', pce, st );
5056          tl_assert(descr && descr->elemTy);
5057          if (is_integer_guest_reg_array(descr)) {
5058             /* if this fails, is_integer_guest_reg_array is returning
5059                bogus results */
5060             tl_assert(descr->elemTy == pce->gWordTy);
5061             stmt(
5062                'I', pce,
5063                IRStmt_PutI(
5064                   mkIRRegArray(descr->base + pce->guest_state_sizeB,
5065                                descr->elemTy, descr->nElems),
5066                   st->Ist.PutI.ix,
5067                   st->Ist.PutI.bias,
5068                   schemeEw_Atom( pce, st->Ist.PutI.data)
5069                )
5070             );
5071          }
5072          break;
5073       }
5074 
5075       case Ist_Put: {
5076          /* PUT(offset) = atom */
5077          /* 3 cases:
5078             1. It's a complete write of an integer register.  Get hold of
5079                'atom's shadow value and write it in the shadow state.
5080             2. It's a partial write of an integer register.  Let the write
5081                happen, then fish out the complete register value and see if,
5082                via range checking, consultation of tea leaves, etc, its
5083                shadow value can be upgraded to anything useful.
5084             3. It is none of the above.  Generate no instrumentation. */
5085          IntRegInfo iii;
5086          IRType     ty;
5087          stmt( 'C', pce, st );
5088          ty = typeOfIRExpr(pce->sb->tyenv, st->Ist.Put.data);
5089          get_IntRegInfo( &iii, st->Ist.Put.offset,
5090                          sizeofIRType(ty) );
5091          if (iii.n_offsets == -1) {
5092             /* case (1): exact write of an integer register. */
5093             tl_assert(ty == pce->gWordTy);
5094             stmt( 'I', pce,
5095                        IRStmt_Put( st->Ist.Put.offset
5096                                       + pce->guest_state_sizeB,
5097                                    schemeEw_Atom( pce, st->Ist.Put.data)) );
5098          } else {
5099             /* when == 0: case (3): no instrumentation needed */
5100             /* when > 0: case (2) .. complex case.  Fish out the
5101                stored value for the whole register, heave it through
5102                nonptr_or_unknown, and use that as the new shadow
5103                value. */
5104             tl_assert(iii.n_offsets >= 0
5105                       && iii.n_offsets <= N_INTREGINFO_OFFSETS);
5106             gen_nonptr_or_unknown_for_III( pce, &iii );
5107          }
5108          break;
5109       } /* case Ist_Put */
5110 
5111       case Ist_Store: {
5112          Bool ok = schemeS_store( pce,
5113                                   st->Ist.Store.data,
5114                                   st->Ist.Store.addr,
5115                                   IRTemp_INVALID/*not a SC*/ );
5116          if (!ok) goto unhandled;
5117          /* Don't copy the original, since the helper does the store
5118             itself. */
5119          break;
5120       }
5121 
5122       case Ist_WrTmp: {
5123          /* This is the only place we have to deal with the full
5124             IRExpr range.  In all other places where an IRExpr could
5125             appear, we in fact only get an atom (Iex_RdTmp or
5126             Iex_Const). */
5127          IRExpr* e      = st->Ist.WrTmp.data;
5128          IRType  e_ty   = typeOfIRExpr( pce->sb->tyenv, e );
5129          Bool    isWord = e_ty == pce->gWordTy;
5130          IRTemp  dst    = st->Ist.WrTmp.tmp;
5131          IRTemp  dstv   = isWord ? newShadowTmp( pce, dst )
5132                                  : IRTemp_INVALID;
5133 
5134          switch (e->tag) {
5135 
5136             case Iex_Const: {
5137                stmt( 'C', pce, st );
5138                if (isWord)
5139                   assign( 'I', pce, dstv, schemeEw_Atom( pce, e ) );
5140                break;
5141             }
5142 
5143             case Iex_CCall: {
5144                stmt( 'C', pce, st );
5145                if (isWord)
5146                   assign( 'I', pce, dstv,
5147                           mkexpr( gen_call_nonptr_or_unknown_w(
5148                                      pce, mkexpr(dst))));
5149                break;
5150             }
5151 
5152             case Iex_Mux0X: {
5153                /* Just steer the shadow values in the same way as the
5154                   originals. */
5155                stmt( 'C', pce, st );
5156                if (isWord)
5157                   assign( 'I', pce, dstv,
5158                           IRExpr_Mux0X(
5159                              e->Iex.Mux0X.cond,
5160                              schemeEw_Atom( pce, e->Iex.Mux0X.expr0 ),
5161                              schemeEw_Atom( pce, e->Iex.Mux0X.exprX ) ));
5162                break;
5163             }
5164 
5165             case Iex_RdTmp: {
5166                stmt( 'C', pce, st );
5167                if (isWord)
5168                   assign( 'I', pce, dstv, schemeEw_Atom( pce, e ));
5169                break;
5170             }
5171 
5172             case Iex_Load: {
5173                schemeS_load( pce, e->Iex.Load.addr, e_ty, dstv );
5174                /* copy the original -- must happen after the helper call */
5175                stmt( 'C', pce, st );
5176                break;
5177             }
5178 
5179             case Iex_GetI: {
5180                IRRegArray* descr = e->Iex.GetI.descr;
5181                stmt( 'C', pce, st );
5182                tl_assert(descr && descr->elemTy);
5183                if (is_integer_guest_reg_array(descr)) {
5184                   /* if this fails, is_integer_guest_reg_array is
5185                      returning bogus results */
5186                   tl_assert(isWord);
5187                   assign(
5188                      'I', pce, dstv,
5189                      IRExpr_GetI(
5190                         mkIRRegArray(descr->base + pce->guest_state_sizeB,
5191                                      descr->elemTy, descr->nElems),
5192                         e->Iex.GetI.ix,
5193                         e->Iex.GetI.bias
5194                      )
5195                   );
5196                }
5197                break;
5198             }
5199 
5200             case Iex_Get: {
5201                stmt( 'C', pce, st );
5202                if (isWord) {
5203                   /* guest-word-typed tmp assignment, so it will have a
5204                      shadow tmp, and we must make an assignment to
5205                      that */
5206                   if (is_integer_guest_reg(e->Iex.Get.offset,
5207                                            sizeofIRType(e->Iex.Get.ty))) {
5208                      assign( 'I', pce, dstv,
5209                              IRExpr_Get( e->Iex.Get.offset
5210                                             + pce->guest_state_sizeB,
5211                                          e->Iex.Get.ty) );
5212                   } else {
5213                      if (pce->hWordTy == Ity_I32) {
5214                         assign( 'I', pce, dstv, mkU32( (UWord)NONPTR ));
5215                      } else {
5216                        assign( 'I', pce, dstv, mkU64( (UWord)NONPTR ));
5217                      }
5218                   }
5219                } else {
5220                   /* tmp isn't guest-word-typed, so isn't shadowed, so
5221                      generate no instrumentation */
5222                }
5223                break;
5224             }
5225 
5226             case Iex_Unop: {
5227                stmt( 'C', pce, st );
5228                tl_assert(isIRAtom(e->Iex.Unop.arg));
5229                if (isWord)
5230                   instrument_arithop( pce, dst, dstv, e->Iex.Unop.op,
5231                                       e->Iex.Unop.arg,
5232                                       NULL, NULL, NULL );
5233                break;
5234             }
5235 
5236             case Iex_Binop: {
5237                stmt( 'C', pce, st );
5238                tl_assert(isIRAtom(e->Iex.Binop.arg1));
5239                tl_assert(isIRAtom(e->Iex.Binop.arg2));
5240                if (isWord)
5241                   instrument_arithop( pce, dst, dstv, e->Iex.Binop.op,
5242                                       e->Iex.Binop.arg1, e->Iex.Binop.arg2,
5243                                       NULL, NULL );
5244                break;
5245             }
5246 
5247             case Iex_Triop: {
5248                stmt( 'C', pce, st );
5249                tl_assert(isIRAtom(e->Iex.Triop.arg1));
5250                tl_assert(isIRAtom(e->Iex.Triop.arg2));
5251                tl_assert(isIRAtom(e->Iex.Triop.arg3));
5252                if (isWord)
5253                   instrument_arithop( pce, dst, dstv, e->Iex.Triop.op,
5254                                       e->Iex.Triop.arg1, e->Iex.Triop.arg2,
5255                                       e->Iex.Triop.arg3, NULL );
5256                break;
5257             }
5258 
5259             case Iex_Qop: {
5260                stmt( 'C', pce, st );
5261                tl_assert(isIRAtom(e->Iex.Qop.arg1));
5262                tl_assert(isIRAtom(e->Iex.Qop.arg2));
5263                tl_assert(isIRAtom(e->Iex.Qop.arg3));
5264                tl_assert(isIRAtom(e->Iex.Qop.arg4));
5265                if (isWord)
5266                   instrument_arithop( pce, dst, dstv, e->Iex.Qop.op,
5267                                       e->Iex.Qop.arg1, e->Iex.Qop.arg2,
5268                                       e->Iex.Qop.arg3, e->Iex.Qop.arg4 );
5269                break;
5270             }
5271 
5272             default:
5273                goto unhandled;
5274          } /* switch (e->tag) */
5275 
5276          break;
5277 
5278       } /* case Ist_WrTmp */
5279 
5280       default:
5281       unhandled:
5282          ppIRStmt(st);
5283          tl_assert(0);
5284    }
5285 }
5286 
5287 
for_sg__newIRTemp_cb(IRType ty,void * opaque)5288 static IRTemp for_sg__newIRTemp_cb ( IRType ty, void* opaque )
5289 {
5290    PCEnv* pce = (PCEnv*)opaque;
5291    return newTemp( pce, ty, NonShad );
5292 }
5293 
5294 
h_instrument(VgCallbackClosure * closure,IRSB * sbIn,VexGuestLayout * layout,VexGuestExtents * vge,IRType gWordTy,IRType hWordTy)5295 IRSB* h_instrument ( VgCallbackClosure* closure,
5296                      IRSB* sbIn,
5297                      VexGuestLayout* layout,
5298                      VexGuestExtents* vge,
5299                      IRType gWordTy, IRType hWordTy )
5300 {
5301    Bool  verboze = 0||False;
5302    Int   i /*, j*/;
5303    PCEnv pce;
5304    struct _SGEnv* sgenv;
5305 
5306    if (gWordTy != hWordTy) {
5307       /* We don't currently support this case. */
5308       VG_(tool_panic)("host/guest word size mismatch");
5309    }
5310 
5311    /* Check we're not completely nuts */
5312    tl_assert(sizeof(UWord)  == sizeof(void*));
5313    tl_assert(sizeof(Word)   == sizeof(void*));
5314    tl_assert(sizeof(Addr)   == sizeof(void*));
5315    tl_assert(sizeof(ULong)  == 8);
5316    tl_assert(sizeof(Long)   == 8);
5317    tl_assert(sizeof(Addr64) == 8);
5318    tl_assert(sizeof(UInt)   == 4);
5319    tl_assert(sizeof(Int)    == 4);
5320 
5321    /* Set up the running environment.  Both .sb and .tmpMap are
5322       modified as we go along.  Note that tmps are added to both
5323       .sb->tyenv and .tmpMap together, so the valid index-set for
5324       those two arrays should always be identical. */
5325    VG_(memset)(&pce, 0, sizeof(pce));
5326    pce.sb                = deepCopyIRSBExceptStmts(sbIn);
5327    pce.trace             = verboze;
5328    pce.hWordTy           = hWordTy;
5329    pce.gWordTy           = gWordTy;
5330    pce.guest_state_sizeB = layout->total_sizeB;
5331 
5332    pce.qmpMap = VG_(newXA)( VG_(malloc), "pc.h_instrument.1", VG_(free),
5333                             sizeof(TempMapEnt));
5334    for (i = 0; i < sbIn->tyenv->types_used; i++) {
5335       TempMapEnt ent;
5336       ent.kind   = NonShad;
5337       ent.shadow = IRTemp_INVALID;
5338       VG_(addToXA)( pce.qmpMap, &ent );
5339    }
5340    tl_assert( VG_(sizeXA)( pce.qmpMap ) == sbIn->tyenv->types_used );
5341 
5342    /* Also set up for the sg_ instrumenter.  See comments at the top
5343       of this instrumentation section for details.  The two parameters
5344       constitute a closure, which sg_ can use to correctly generate
5345       new IRTemps as needed. */
5346    sgenv = sg_instrument_init( for_sg__newIRTemp_cb,
5347                                (void*)&pce );
5348 
5349    /* Stay sane.  These two should agree! */
5350    tl_assert(layout->total_sizeB == PC_SIZEOF_GUEST_STATE);
5351 
5352    /* Copy verbatim any IR preamble preceding the first IMark */
5353 
5354    i = 0;
5355    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
5356       IRStmt* st = sbIn->stmts[i];
5357       tl_assert(st);
5358       tl_assert(isFlatIRStmt(st));
5359       stmt( 'C', &pce, sbIn->stmts[i] );
5360       i++;
5361    }
5362 
5363    /* Nasty problem.  IR optimisation of the pre-instrumented IR may
5364       cause the IR following the preamble to contain references to IR
5365       temporaries defined in the preamble.  Because the preamble isn't
5366       instrumented, these temporaries don't have any shadows.
5367       Nevertheless uses of them following the preamble will cause
5368       memcheck to generate references to their shadows.  End effect is
5369       to cause IR sanity check failures, due to references to
5370       non-existent shadows.  This is only evident for the complex
5371       preambles used for function wrapping on TOC-afflicted platforms
5372       (ppc64-linux, ppc32-aix5, ppc64-aix5).
5373 
5374       The following loop therefore scans the preamble looking for
5375       assignments to temporaries.  For each one found it creates an
5376       assignment to the corresponding shadow temp, marking it as
5377       'defined'.  This is the same resulting IR as if the main
5378       instrumentation loop before had been applied to the statement
5379       'tmp = CONSTANT'.
5380    */
5381 #if 0
5382    // FIXME: this isn't exactly right; only needs to generate shadows
5383    // for guest-word-typed temps
5384    for (j = 0; j < i; j++) {
5385       if (sbIn->stmts[j]->tag == Ist_WrTmp) {
5386          /* findShadowTmpV checks its arg is an original tmp;
5387             no need to assert that here. */
5388          IRTemp tmp_o = sbIn->stmts[j]->Ist.WrTmp.tmp;
5389          IRTemp tmp_s = findShadowTmp(&pce, tmp_o);
5390          IRType ty_s  = typeOfIRTemp(sbIn->tyenv, tmp_s);
5391          assign( 'V', &pce, tmp_s, definedOfType( ty_s ) );
5392          if (0) {
5393             VG_(printf)("create shadow tmp for preamble tmp [%d] ty ", j);
5394             ppIRType( ty_s );
5395             VG_(printf)("\n");
5396          }
5397       }
5398    }
5399 #endif
5400 
5401    /* Iterate over the remaining stmts to generate instrumentation. */
5402 
5403    tl_assert(sbIn->stmts_used > 0);
5404    tl_assert(i >= 0);
5405    tl_assert(i < sbIn->stmts_used);
5406    tl_assert(sbIn->stmts[i]->tag == Ist_IMark);
5407 
5408    for (/*use current i*/; i < sbIn->stmts_used; i++) {
5409       /* generate sg_ instrumentation for this stmt */
5410       sg_instrument_IRStmt( sgenv, pce.sb, sbIn->stmts[i],
5411                             layout, gWordTy, hWordTy );
5412       /* generate h_ instrumentation for this stmt */
5413       schemeS( &pce, sbIn->stmts[i] );
5414    }
5415 
5416    /* generate sg_ instrumentation for the final jump */
5417    sg_instrument_final_jump( sgenv, pce.sb, sbIn->next, sbIn->jumpkind,
5418                              layout, gWordTy, hWordTy );
5419 
5420    /* and finalise .. */
5421    sg_instrument_fini( sgenv );
5422 
5423    /* If this fails, there's been some serious snafu with tmp management,
5424       that should be investigated. */
5425    tl_assert( VG_(sizeXA)( pce.qmpMap ) == pce.sb->tyenv->types_used );
5426    VG_(deleteXA)( pce.qmpMap );
5427 
5428    return pce.sb;
5429 }
5430 
5431 
5432 /*--------------------------------------------------------------------*/
5433 /*--- Initialisation                                               ---*/
5434 /*--------------------------------------------------------------------*/
5435 
h_pre_clo_init(void)5436 void h_pre_clo_init ( void )
5437 {
5438    // Other initialisation
5439    init_shadow_memory();
5440    init_lossage();
5441 }
5442 
h_post_clo_init(void)5443 void h_post_clo_init ( void )
5444 {
5445 }
5446 
5447 /*--------------------------------------------------------------------*/
5448 /*--- Finalisation                                                 ---*/
5449 /*--------------------------------------------------------------------*/
5450 
h_fini(Int exitcode)5451 void h_fini ( Int exitcode )
5452 {
5453    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
5454       VG_(message)(Vg_UserMsg,
5455                    "For counts of detected and suppressed errors, "
5456                    "rerun with: -v\n");
5457    }
5458 
5459    if (VG_(clo_stats)) {
5460       VG_(message)(Vg_DebugMsg,
5461                    "  h_:  %'10llu client allocs, %'10llu client frees\n",
5462                    stats__client_mallocs, stats__client_frees);
5463       VG_(message)(Vg_DebugMsg,
5464                    "  h_:  %'10llu Segs allocd,   %'10llu Segs recycled\n",
5465                    stats__segs_allocd, stats__segs_recycled);
5466    }
5467 
5468 #if 0
5469    if (h_clo_lossage_check) {
5470       VG_(message)(Vg_UserMsg, "\n");
5471       VG_(message)(Vg_UserMsg, "%12lld total memory references\n",
5472                                stats__tot_mem_refs);
5473       VG_(message)(Vg_UserMsg, "%12lld   of which are in a known segment\n",
5474                                stats__refs_in_a_seg);
5475       VG_(message)(Vg_UserMsg, "%12lld   of which are 'lost' w.r.t the seg\n",
5476                                stats__refs_lost_seg);
5477       VG_(message)(Vg_UserMsg, "\n");
5478       show_lossage();
5479       VG_(message)(Vg_UserMsg, "\n");
5480    } else {
5481       tl_assert( 0 == VG_(OSetGen_Size)(lossage) );
5482    }
5483 #endif
5484 }
5485 
5486 
5487 /*--------------------------------------------------------------------*/
5488 /*--- end                                                 h_main.c ---*/
5489 /*--------------------------------------------------------------------*/
5490