• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
4 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
5 /*---                                                    mc_main.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of MemCheck, a heavyweight Valgrind tool for
10    detecting memory errors.
11 
12    Copyright (C) 2000-2011 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 #include "pub_tool_basics.h"
34 #include "pub_tool_aspacemgr.h"
35 #include "pub_tool_gdbserver.h"
36 #include "pub_tool_hashtable.h"     // For mc_include.h
37 #include "pub_tool_libcbase.h"
38 #include "pub_tool_libcassert.h"
39 #include "pub_tool_libcprint.h"
40 #include "pub_tool_machine.h"
41 #include "pub_tool_mallocfree.h"
42 #include "pub_tool_options.h"
43 #include "pub_tool_oset.h"
44 #include "pub_tool_replacemalloc.h"
45 #include "pub_tool_tooliface.h"
46 #include "pub_tool_threadstate.h"
47 
48 #include "mc_include.h"
49 #include "memcheck.h"   /* for client requests */
50 
51 
52 /* We really want this frame-pointer-less on all platforms, since the
53    helper functions are small and called very frequently.  By default
54    on x86-linux, though, Makefile.all.am doesn't specify it, so do it
55    here.  Requires gcc >= 4.4, unfortunately. */
56 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
57 # pragma GCC optimize("-fomit-frame-pointer")
58 #endif
59 
60 
61 /* Set to 1 to do a little more sanity checking */
62 #define VG_DEBUG_MEMORY 0
63 
64 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
65 
66 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
67 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
68 
69 
70 /*------------------------------------------------------------*/
71 /*--- Fast-case knobs                                      ---*/
72 /*------------------------------------------------------------*/
73 
74 // Comment these out to disable the fast cases (don't just set them to zero).
75 
76 #define PERF_FAST_LOADV    1
77 #define PERF_FAST_STOREV   1
78 
79 #define PERF_FAST_SARP     1
80 
81 #define PERF_FAST_STACK    1
82 #define PERF_FAST_STACK2   1
83 
84 /* Change this to 1 to enable assertions on origin tracking cache fast
85    paths */
86 #define OC_ENABLE_ASSERTIONS 0
87 
88 
89 /*------------------------------------------------------------*/
90 /*--- Comments on the origin tracking implementation       ---*/
91 /*------------------------------------------------------------*/
92 
93 /* See detailed comment entitled
94    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
95    which is contained further on in this file. */
96 
97 
98 /*------------------------------------------------------------*/
99 /*--- V bits and A bits                                    ---*/
100 /*------------------------------------------------------------*/
101 
102 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
103    thinks the corresponding value bit is defined.  And every memory byte
104    has an A bit, which tracks whether Memcheck thinks the program can access
105    it safely (ie. it's mapped, and has at least one of the RWX permission bits
106    set).  So every N-bit register is shadowed with N V bits, and every memory
107    byte is shadowed with 8 V bits and one A bit.
108 
109    In the implementation, we use two forms of compression (compressed V bits
110    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
111    for memory.
112 
113    Memcheck also tracks extra information about each heap block that is
114    allocated, for detecting memory leaks and other purposes.
115 */
116 
117 /*------------------------------------------------------------*/
118 /*--- Basic A/V bitmap representation.                     ---*/
119 /*------------------------------------------------------------*/
120 
121 /* All reads and writes are checked against a memory map (a.k.a. shadow
122    memory), which records the state of all memory in the process.
123 
124    On 32-bit machines the memory map is organised as follows.
125    The top 16 bits of an address are used to index into a top-level
126    map table, containing 65536 entries.  Each entry is a pointer to a
127    second-level map, which records the accesibililty and validity
128    permissions for the 65536 bytes indexed by the lower 16 bits of the
129    address.  Each byte is represented by two bits (details are below).  So
130    each second-level map contains 16384 bytes.  This two-level arrangement
131    conveniently divides the 4G address space into 64k lumps, each size 64k
132    bytes.
133 
134    All entries in the primary (top-level) map must point to a valid
135    secondary (second-level) map.  Since many of the 64kB chunks will
136    have the same status for every bit -- ie. noaccess (for unused
137    address space) or entirely addressable and defined (for code segments) --
138    there are three distinguished secondary maps, which indicate 'noaccess',
139    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
140    map entry points to the relevant distinguished map.  In practice,
141    typically more than half of the addressable memory is represented with
142    the 'undefined' or 'defined' distinguished secondary map, so it gives a
143    good saving.  It also lets us set the V+A bits of large address regions
144    quickly in set_address_range_perms().
145 
146    On 64-bit machines it's more complicated.  If we followed the same basic
147    scheme we'd have a four-level table which would require too many memory
148    accesses.  So instead the top-level map table has 2^19 entries (indexed
149    using bits 16..34 of the address);  this covers the bottom 32GB.  Any
150    accesses above 32GB are handled with a slow, sparse auxiliary table.
151    Valgrind's address space manager tries very hard to keep things below
152    this 32GB barrier so that performance doesn't suffer too much.
153 
154    Note that this file has a lot of different functions for reading and
155    writing shadow memory.  Only a couple are strictly necessary (eg.
156    get_vabits2 and set_vabits2), most are just specialised for specific
157    common cases to improve performance.
158 
159    Aside: the V+A bits are less precise than they could be -- we have no way
160    of marking memory as read-only.  It would be great if we could add an
161    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
162    which requires 2.3 bits to hold, and there's no way to do that elegantly
163    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
164    seem worth it.
165 */
166 
167 /* --------------- Basic configuration --------------- */
168 
169 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
170 
171 #if VG_WORDSIZE == 4
172 
173 /* cover the entire address space */
174 #  define N_PRIMARY_BITS  16
175 
176 #else
177 
178 /* Just handle the first 256G fast and the rest via auxiliary
179    primaries.  If you change this, Memcheck will assert at startup.
180    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
181 #  define N_PRIMARY_BITS  22
182 
183 #endif
184 
185 
186 /* Do not change this. */
187 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
188 
189 /* Do not change this. */
190 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
191 
192 
193 /* --------------- Secondary maps --------------- */
194 
195 // Each byte of memory conceptually has an A bit, which indicates its
196 // addressability, and 8 V bits, which indicates its definedness.
197 //
198 // But because very few bytes are partially defined, we can use a nice
199 // compression scheme to reduce the size of shadow memory.  Each byte of
200 // memory has 2 bits which indicates its state (ie. V+A bits):
201 //
202 //   00:  noaccess    (unaddressable but treated as fully defined)
203 //   01:  undefined   (addressable and fully undefined)
204 //   10:  defined     (addressable and fully defined)
205 //   11:  partdefined (addressable and partially defined)
206 //
207 // In the "partdefined" case, we use a secondary table to store the V bits.
208 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
209 // bits.
210 //
211 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
212 // four bytes (32 bits) of memory are in each chunk.  Hence the name
213 // "vabits8".  This lets us get the V+A bits for four bytes at a time
214 // easily (without having to do any shifting and/or masking), and that is a
215 // very common operation.  (Note that although each vabits8 chunk
216 // is 8 bits in size, it represents 32 bits of memory.)
217 //
218 // The representation is "inverse" little-endian... each 4 bytes of
219 // memory is represented by a 1 byte value, where:
220 //
221 // - the status of byte (a+0) is held in bits [1..0]
222 // - the status of byte (a+1) is held in bits [3..2]
223 // - the status of byte (a+2) is held in bits [5..4]
224 // - the status of byte (a+3) is held in bits [7..6]
225 //
226 // It's "inverse" because endianness normally describes a mapping from
227 // value bits to memory addresses;  in this case the mapping is inverted.
228 // Ie. instead of particular value bits being held in certain addresses, in
229 // this case certain addresses are represented by particular value bits.
230 // See insert_vabits2_into_vabits8() for an example.
231 //
232 // But note that we don't compress the V bits stored in registers;  they
233 // need to be explicit to made the shadow operations possible.  Therefore
234 // when moving values between registers and memory we need to convert
235 // between the expanded in-register format and the compressed in-memory
236 // format.  This isn't so difficult, it just requires careful attention in a
237 // few places.
238 
239 // These represent eight bits of memory.
240 #define VA_BITS2_NOACCESS     0x0      // 00b
241 #define VA_BITS2_UNDEFINED    0x1      // 01b
242 #define VA_BITS2_DEFINED      0x2      // 10b
243 #define VA_BITS2_PARTDEFINED  0x3      // 11b
244 
245 // These represent 16 bits of memory.
246 #define VA_BITS4_NOACCESS     0x0      // 00_00b
247 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
248 #define VA_BITS4_DEFINED      0xa      // 10_10b
249 
250 // These represent 32 bits of memory.
251 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
252 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
253 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
254 
255 // These represent 64 bits of memory.
256 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
257 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
258 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
259 
260 
261 #define SM_CHUNKS             16384
262 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
263 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
264 
265 // Paranoia:  it's critical for performance that the requested inlining
266 // occurs.  So try extra hard.
267 #define INLINE    inline __attribute__((always_inline))
268 
start_of_this_sm(Addr a)269 static INLINE Addr start_of_this_sm ( Addr a ) {
270    return (a & (~SM_MASK));
271 }
is_start_of_sm(Addr a)272 static INLINE Bool is_start_of_sm ( Addr a ) {
273    return (start_of_this_sm(a) == a);
274 }
275 
276 typedef
277    struct {
278       UChar vabits8[SM_CHUNKS];
279    }
280    SecMap;
281 
282 // 3 distinguished secondary maps, one for no-access, one for
283 // accessible but undefined, and one for accessible and defined.
284 // Distinguished secondaries may never be modified.
285 #define SM_DIST_NOACCESS   0
286 #define SM_DIST_UNDEFINED  1
287 #define SM_DIST_DEFINED    2
288 
289 static SecMap sm_distinguished[3];
290 
is_distinguished_sm(SecMap * sm)291 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
292    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
293 }
294 
295 // Forward declaration
296 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
297 
298 /* dist_sm points to one of our three distinguished secondaries.  Make
299    a copy of it so that we can write to it.
300 */
copy_for_writing(SecMap * dist_sm)301 static SecMap* copy_for_writing ( SecMap* dist_sm )
302 {
303    SecMap* new_sm;
304    tl_assert(dist_sm == &sm_distinguished[0]
305           || dist_sm == &sm_distinguished[1]
306           || dist_sm == &sm_distinguished[2]);
307 
308    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
309    if (new_sm == NULL)
310       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
311                                    sizeof(SecMap) );
312    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
313    update_SM_counts(dist_sm, new_sm);
314    return new_sm;
315 }
316 
317 /* --------------- Stats --------------- */
318 
319 static Int   n_issued_SMs      = 0;
320 static Int   n_deissued_SMs    = 0;
321 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
322 static Int   n_undefined_SMs   = 0;
323 static Int   n_defined_SMs     = 0;
324 static Int   n_non_DSM_SMs     = 0;
325 static Int   max_noaccess_SMs  = 0;
326 static Int   max_undefined_SMs = 0;
327 static Int   max_defined_SMs   = 0;
328 static Int   max_non_DSM_SMs   = 0;
329 
330 /* # searches initiated in auxmap_L1, and # base cmps required */
331 static ULong n_auxmap_L1_searches  = 0;
332 static ULong n_auxmap_L1_cmps      = 0;
333 /* # of searches that missed in auxmap_L1 and therefore had to
334    be handed to auxmap_L2. And the number of nodes inserted. */
335 static ULong n_auxmap_L2_searches  = 0;
336 static ULong n_auxmap_L2_nodes     = 0;
337 
338 static Int   n_sanity_cheap     = 0;
339 static Int   n_sanity_expensive = 0;
340 
341 static Int   n_secVBit_nodes   = 0;
342 static Int   max_secVBit_nodes = 0;
343 
update_SM_counts(SecMap * oldSM,SecMap * newSM)344 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
345 {
346    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
347    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
348    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
349    else                                                  { n_non_DSM_SMs  --;
350                                                            n_deissued_SMs ++; }
351 
352    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
353    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
354    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
355    else                                                  { n_non_DSM_SMs  ++;
356                                                            n_issued_SMs   ++; }
357 
358    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
359    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
360    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
361    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
362 }
363 
364 /* --------------- Primary maps --------------- */
365 
366 /* The main primary map.  This covers some initial part of the address
367    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
368    handled using the auxiliary primary map.
369 */
370 static SecMap* primary_map[N_PRIMARY_MAP];
371 
372 
373 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
374    value, and sm points at the relevant secondary map.  As with the
375    main primary map, the secondary may be either a real secondary, or
376    one of the three distinguished secondaries.  DO NOT CHANGE THIS
377    LAYOUT: the first word has to be the key for OSet fast lookups.
378 */
379 typedef
380    struct {
381       Addr    base;
382       SecMap* sm;
383    }
384    AuxMapEnt;
385 
386 /* Tunable parameter: How big is the L1 queue? */
387 #define N_AUXMAP_L1 24
388 
389 /* Tunable parameter: How far along the L1 queue to insert
390    entries resulting from L2 lookups? */
391 #define AUXMAP_L1_INSERT_IX 12
392 
393 static struct {
394           Addr       base;
395           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
396        }
397        auxmap_L1[N_AUXMAP_L1];
398 
399 static OSet* auxmap_L2 = NULL;
400 
init_auxmap_L1_L2(void)401 static void init_auxmap_L1_L2 ( void )
402 {
403    Int i;
404    for (i = 0; i < N_AUXMAP_L1; i++) {
405       auxmap_L1[i].base = 0;
406       auxmap_L1[i].ent  = NULL;
407    }
408 
409    tl_assert(0 == offsetof(AuxMapEnt,base));
410    tl_assert(sizeof(Addr) == sizeof(void*));
411    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
412                                     /*fastCmp*/ NULL,
413                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
414 }
415 
416 /* Check representation invariants; if OK return NULL; else a
417    descriptive bit of text.  Also return the number of
418    non-distinguished secondary maps referred to from the auxiliary
419    primary maps. */
420 
check_auxmap_L1_L2_sanity(Word * n_secmaps_found)421 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
422 {
423    Word i, j;
424    /* On a 32-bit platform, the L2 and L1 tables should
425       both remain empty forever.
426 
427       On a 64-bit platform:
428       In the L2 table:
429        all .base & 0xFFFF == 0
430        all .base > MAX_PRIMARY_ADDRESS
431       In the L1 table:
432        all .base & 0xFFFF == 0
433        all (.base > MAX_PRIMARY_ADDRESS
434             .base & 0xFFFF == 0
435             and .ent points to an AuxMapEnt with the same .base)
436            or
437            (.base == 0 and .ent == NULL)
438    */
439    *n_secmaps_found = 0;
440    if (sizeof(void*) == 4) {
441       /* 32-bit platform */
442       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
443          return "32-bit: auxmap_L2 is non-empty";
444       for (i = 0; i < N_AUXMAP_L1; i++)
445         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
446       return "32-bit: auxmap_L1 is non-empty";
447    } else {
448       /* 64-bit platform */
449       UWord elems_seen = 0;
450       AuxMapEnt *elem, *res;
451       AuxMapEnt key;
452       /* L2 table */
453       VG_(OSetGen_ResetIter)(auxmap_L2);
454       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
455          elems_seen++;
456          if (0 != (elem->base & (Addr)0xFFFF))
457             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
458          if (elem->base <= MAX_PRIMARY_ADDRESS)
459             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
460          if (elem->sm == NULL)
461             return "64-bit: .sm in _L2 is NULL";
462          if (!is_distinguished_sm(elem->sm))
463             (*n_secmaps_found)++;
464       }
465       if (elems_seen != n_auxmap_L2_nodes)
466          return "64-bit: disagreement on number of elems in _L2";
467       /* Check L1-L2 correspondence */
468       for (i = 0; i < N_AUXMAP_L1; i++) {
469          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
470             continue;
471          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
472             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
473          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
474             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
475          if (auxmap_L1[i].ent == NULL)
476             return "64-bit: .ent is NULL in auxmap_L1";
477          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
478             return "64-bit: _L1 and _L2 bases are inconsistent";
479          /* Look it up in auxmap_L2. */
480          key.base = auxmap_L1[i].base;
481          key.sm   = 0;
482          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
483          if (res == NULL)
484             return "64-bit: _L1 .base not found in _L2";
485          if (res != auxmap_L1[i].ent)
486             return "64-bit: _L1 .ent disagrees with _L2 entry";
487       }
488       /* Check L1 contains no duplicates */
489       for (i = 0; i < N_AUXMAP_L1; i++) {
490          if (auxmap_L1[i].base == 0)
491             continue;
492 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
493             if (auxmap_L1[j].base == 0)
494                continue;
495             if (auxmap_L1[j].base == auxmap_L1[i].base)
496                return "64-bit: duplicate _L1 .base entries";
497          }
498       }
499    }
500    return NULL; /* ok */
501 }
502 
insert_into_auxmap_L1_at(Word rank,AuxMapEnt * ent)503 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
504 {
505    Word i;
506    tl_assert(ent);
507    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
508    for (i = N_AUXMAP_L1-1; i > rank; i--)
509       auxmap_L1[i] = auxmap_L1[i-1];
510    auxmap_L1[rank].base = ent->base;
511    auxmap_L1[rank].ent  = ent;
512 }
513 
maybe_find_in_auxmap(Addr a)514 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
515 {
516    AuxMapEnt  key;
517    AuxMapEnt* res;
518    Word       i;
519 
520    tl_assert(a > MAX_PRIMARY_ADDRESS);
521    a &= ~(Addr)0xFFFF;
522 
523    /* First search the front-cache, which is a self-organising
524       list containing the most popular entries. */
525 
526    if (LIKELY(auxmap_L1[0].base == a))
527       return auxmap_L1[0].ent;
528    if (LIKELY(auxmap_L1[1].base == a)) {
529       Addr       t_base = auxmap_L1[0].base;
530       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
531       auxmap_L1[0].base = auxmap_L1[1].base;
532       auxmap_L1[0].ent  = auxmap_L1[1].ent;
533       auxmap_L1[1].base = t_base;
534       auxmap_L1[1].ent  = t_ent;
535       return auxmap_L1[0].ent;
536    }
537 
538    n_auxmap_L1_searches++;
539 
540    for (i = 0; i < N_AUXMAP_L1; i++) {
541       if (auxmap_L1[i].base == a) {
542          break;
543       }
544    }
545    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
546 
547    n_auxmap_L1_cmps += (ULong)(i+1);
548 
549    if (i < N_AUXMAP_L1) {
550       if (i > 0) {
551          Addr       t_base = auxmap_L1[i-1].base;
552          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
553          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
554          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
555          auxmap_L1[i-0].base = t_base;
556          auxmap_L1[i-0].ent  = t_ent;
557          i--;
558       }
559       return auxmap_L1[i].ent;
560    }
561 
562    n_auxmap_L2_searches++;
563 
564    /* First see if we already have it. */
565    key.base = a;
566    key.sm   = 0;
567 
568    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
569    if (res)
570       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
571    return res;
572 }
573 
find_or_alloc_in_auxmap(Addr a)574 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
575 {
576    AuxMapEnt *nyu, *res;
577 
578    /* First see if we already have it. */
579    res = maybe_find_in_auxmap( a );
580    if (LIKELY(res))
581       return res;
582 
583    /* Ok, there's no entry in the secondary map, so we'll have
584       to allocate one. */
585    a &= ~(Addr)0xFFFF;
586 
587    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
588    tl_assert(nyu);
589    nyu->base = a;
590    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
591    VG_(OSetGen_Insert)( auxmap_L2, nyu );
592    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
593    n_auxmap_L2_nodes++;
594    return nyu;
595 }
596 
597 /* --------------- SecMap fundamentals --------------- */
598 
599 // In all these, 'low' means it's definitely in the main primary map,
600 // 'high' means it's definitely in the auxiliary table.
601 
get_secmap_low_ptr(Addr a)602 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
603 {
604    UWord pm_off = a >> 16;
605 #  if VG_DEBUG_MEMORY >= 1
606    tl_assert(pm_off < N_PRIMARY_MAP);
607 #  endif
608    return &primary_map[ pm_off ];
609 }
610 
get_secmap_high_ptr(Addr a)611 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
612 {
613    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
614    return &am->sm;
615 }
616 
get_secmap_ptr(Addr a)617 static SecMap** get_secmap_ptr ( Addr a )
618 {
619    return ( a <= MAX_PRIMARY_ADDRESS
620           ? get_secmap_low_ptr(a)
621           : get_secmap_high_ptr(a));
622 }
623 
get_secmap_for_reading_low(Addr a)624 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
625 {
626    return *get_secmap_low_ptr(a);
627 }
628 
get_secmap_for_reading_high(Addr a)629 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
630 {
631    return *get_secmap_high_ptr(a);
632 }
633 
get_secmap_for_writing_low(Addr a)634 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
635 {
636    SecMap** p = get_secmap_low_ptr(a);
637    if (UNLIKELY(is_distinguished_sm(*p)))
638       *p = copy_for_writing(*p);
639    return *p;
640 }
641 
get_secmap_for_writing_high(Addr a)642 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
643 {
644    SecMap** p = get_secmap_high_ptr(a);
645    if (UNLIKELY(is_distinguished_sm(*p)))
646       *p = copy_for_writing(*p);
647    return *p;
648 }
649 
650 /* Produce the secmap for 'a', either from the primary map or by
651    ensuring there is an entry for it in the aux primary map.  The
652    secmap may be a distinguished one as the caller will only want to
653    be able to read it.
654 */
get_secmap_for_reading(Addr a)655 static INLINE SecMap* get_secmap_for_reading ( Addr a )
656 {
657    return ( a <= MAX_PRIMARY_ADDRESS
658           ? get_secmap_for_reading_low (a)
659           : get_secmap_for_reading_high(a) );
660 }
661 
662 /* Produce the secmap for 'a', either from the primary map or by
663    ensuring there is an entry for it in the aux primary map.  The
664    secmap may not be a distinguished one, since the caller will want
665    to be able to write it.  If it is a distinguished secondary, make a
666    writable copy of it, install it, and return the copy instead.  (COW
667    semantics).
668 */
get_secmap_for_writing(Addr a)669 static SecMap* get_secmap_for_writing ( Addr a )
670 {
671    return ( a <= MAX_PRIMARY_ADDRESS
672           ? get_secmap_for_writing_low (a)
673           : get_secmap_for_writing_high(a) );
674 }
675 
676 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
677    allocate one if one doesn't already exist.  This is used by the
678    leak checker.
679 */
maybe_get_secmap_for(Addr a)680 static SecMap* maybe_get_secmap_for ( Addr a )
681 {
682    if (a <= MAX_PRIMARY_ADDRESS) {
683       return get_secmap_for_reading_low(a);
684    } else {
685       AuxMapEnt* am = maybe_find_in_auxmap(a);
686       return am ? am->sm : NULL;
687    }
688 }
689 
690 /* --------------- Fundamental functions --------------- */
691 
692 static INLINE
insert_vabits2_into_vabits8(Addr a,UChar vabits2,UChar * vabits8)693 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
694 {
695    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
696    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
697    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
698 }
699 
700 static INLINE
insert_vabits4_into_vabits8(Addr a,UChar vabits4,UChar * vabits8)701 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
702 {
703    UInt shift;
704    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
705    shift     =  (a & 2)   << 1;        // shift by 0 or 4
706    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
707    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
708 }
709 
710 static INLINE
extract_vabits2_from_vabits8(Addr a,UChar vabits8)711 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
712 {
713    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
714    vabits8 >>= shift;                  // shift the two bits to the bottom
715    return 0x3 & vabits8;               // mask out the rest
716 }
717 
718 static INLINE
extract_vabits4_from_vabits8(Addr a,UChar vabits8)719 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
720 {
721    UInt shift;
722    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
723    shift = (a & 2) << 1;               // shift by 0 or 4
724    vabits8 >>= shift;                  // shift the four bits to the bottom
725    return 0xf & vabits8;               // mask out the rest
726 }
727 
728 // Note that these four are only used in slow cases.  The fast cases do
729 // clever things like combine the auxmap check (in
730 // get_secmap_{read,writ}able) with alignment checks.
731 
732 // *** WARNING! ***
733 // Any time this function is called, if it is possible that vabits2
734 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
735 // sec-V-bits table must also be set!
736 static INLINE
set_vabits2(Addr a,UChar vabits2)737 void set_vabits2 ( Addr a, UChar vabits2 )
738 {
739    SecMap* sm       = get_secmap_for_writing(a);
740    UWord   sm_off   = SM_OFF(a);
741    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
742 }
743 
744 static INLINE
get_vabits2(Addr a)745 UChar get_vabits2 ( Addr a )
746 {
747    SecMap* sm       = get_secmap_for_reading(a);
748    UWord   sm_off   = SM_OFF(a);
749    UChar   vabits8  = sm->vabits8[sm_off];
750    return extract_vabits2_from_vabits8(a, vabits8);
751 }
752 
753 // *** WARNING! ***
754 // Any time this function is called, if it is possible that any of the
755 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
756 // corresponding entry(s) in the sec-V-bits table must also be set!
757 static INLINE
get_vabits8_for_aligned_word32(Addr a)758 UChar get_vabits8_for_aligned_word32 ( Addr a )
759 {
760    SecMap* sm       = get_secmap_for_reading(a);
761    UWord   sm_off   = SM_OFF(a);
762    UChar   vabits8  = sm->vabits8[sm_off];
763    return vabits8;
764 }
765 
766 static INLINE
set_vabits8_for_aligned_word32(Addr a,UChar vabits8)767 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
768 {
769    SecMap* sm       = get_secmap_for_writing(a);
770    UWord   sm_off   = SM_OFF(a);
771    sm->vabits8[sm_off] = vabits8;
772 }
773 
774 
775 // Forward declarations
776 static UWord get_sec_vbits8(Addr a);
777 static void  set_sec_vbits8(Addr a, UWord vbits8);
778 
779 // Returns False if there was an addressability error.
780 static INLINE
set_vbits8(Addr a,UChar vbits8)781 Bool set_vbits8 ( Addr a, UChar vbits8 )
782 {
783    Bool  ok      = True;
784    UChar vabits2 = get_vabits2(a);
785    if ( VA_BITS2_NOACCESS != vabits2 ) {
786       // Addressable.  Convert in-register format to in-memory format.
787       // Also remove any existing sec V bit entry for the byte if no
788       // longer necessary.
789       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
790       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
791       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
792                                                 set_sec_vbits8(a, vbits8);  }
793       set_vabits2(a, vabits2);
794 
795    } else {
796       // Unaddressable!  Do nothing -- when writing to unaddressable
797       // memory it acts as a black hole, and the V bits can never be seen
798       // again.  So we don't have to write them at all.
799       ok = False;
800    }
801    return ok;
802 }
803 
804 // Returns False if there was an addressability error.  In that case, we put
805 // all defined bits into vbits8.
806 static INLINE
get_vbits8(Addr a,UChar * vbits8)807 Bool get_vbits8 ( Addr a, UChar* vbits8 )
808 {
809    Bool  ok      = True;
810    UChar vabits2 = get_vabits2(a);
811 
812    // Convert the in-memory format to in-register format.
813    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
814    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
815    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
816       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
817       ok = False;
818    } else {
819       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
820       *vbits8 = get_sec_vbits8(a);
821    }
822    return ok;
823 }
824 
825 
826 /* --------------- Secondary V bit table ------------ */
827 
828 // This table holds the full V bit pattern for partially-defined bytes
829 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
830 // memory.
831 //
832 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
833 // then overwrite the same address with a fully defined byte, the sec-V-bit
834 // node will not necessarily be removed.  This is because checking for
835 // whether removal is necessary would slow down the fast paths.
836 //
837 // To avoid the stale nodes building up too much, we periodically (once the
838 // table reaches a certain size) garbage collect (GC) the table by
839 // traversing it and evicting any "sufficiently stale" nodes, ie. nodes that
840 // are stale and haven't been touched for a certain number of collections.
841 // If more than a certain proportion of nodes survived, we increase the
842 // table size so that GCs occur less often.
843 //
844 // (So this a bit different to a traditional GC, where you definitely want
845 // to remove any dead nodes.  It's more like we have a resizable cache and
846 // we're trying to find the right balance how many elements to evict and how
847 // big to make the cache.)
848 //
849 // This policy is designed to avoid bad table bloat in the worst case where
850 // a program creates huge numbers of stale PDBs -- we would get this bloat
851 // if we had no GC -- while handling well the case where a node becomes
852 // stale but shortly afterwards is rewritten with a PDB and so becomes
853 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
854 // remove all stale nodes as soon as possible, we just end up re-adding a
855 // lot of them in later again.  The "sufficiently stale" approach avoids
856 // this.  (If a program has many live PDBs, performance will just suck,
857 // there's no way around that.)
858 
859 static OSet* secVBitTable;
860 
861 // Stats
862 static ULong sec_vbits_new_nodes = 0;
863 static ULong sec_vbits_updates   = 0;
864 
865 // This must be a power of two;  this is checked in mc_pre_clo_init().
866 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
867 // a larger address range) they take more space but we can get multiple
868 // partially-defined bytes in one if they are close to each other, reducing
869 // the number of total nodes.  In practice sometimes they are clustered (eg.
870 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
871 // row), but often not.  So we choose something intermediate.
872 #define BYTES_PER_SEC_VBIT_NODE     16
873 
874 // We make the table bigger if more than this many nodes survive a GC.
875 #define MAX_SURVIVOR_PROPORTION  0.5
876 
877 // Each time we make the table bigger, we increase it by this much.
878 #define TABLE_GROWTH_FACTOR      2
879 
880 // This defines "sufficiently stale" -- any node that hasn't been touched in
881 // this many GCs will be removed.
882 #define MAX_STALE_AGE            2
883 
884 // We GC the table when it gets this many nodes in it, ie. it's effectively
885 // the table size.  It can change.
886 static Int  secVBitLimit = 1024;
887 
888 // The number of GCs done, used to age sec-V-bit nodes for eviction.
889 // Because it's unsigned, wrapping doesn't matter -- the right answer will
890 // come out anyway.
891 static UInt GCs_done = 0;
892 
893 typedef
894    struct {
895       Addr  a;
896       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
897       UInt  last_touched;
898    }
899    SecVBitNode;
900 
createSecVBitTable(void)901 static OSet* createSecVBitTable(void)
902 {
903    return VG_(OSetGen_Create)( offsetof(SecVBitNode, a),
904                                NULL, // use fast comparisons
905                                VG_(malloc), "mc.cSVT.1 (sec VBit table)",
906                                VG_(free) );
907 }
908 
gcSecVBitTable(void)909 static void gcSecVBitTable(void)
910 {
911    OSet*        secVBitTable2;
912    SecVBitNode* n;
913    Int          i, n_nodes = 0, n_survivors = 0;
914 
915    GCs_done++;
916 
917    // Create the new table.
918    secVBitTable2 = createSecVBitTable();
919 
920    // Traverse the table, moving fresh nodes into the new table.
921    VG_(OSetGen_ResetIter)(secVBitTable);
922    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
923       Bool keep = False;
924       if ( (GCs_done - n->last_touched) <= MAX_STALE_AGE ) {
925          // Keep node if it's been touched recently enough (regardless of
926          // freshness/staleness).
927          keep = True;
928       } else {
929          // Keep node if any of its bytes are non-stale.  Using
930          // get_vabits2() for the lookup is not very efficient, but I don't
931          // think it matters.
932          for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
933             if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
934                keep = True;      // Found a non-stale byte, so keep
935                break;
936             }
937          }
938       }
939 
940       if ( keep ) {
941          // Insert a copy of the node into the new table.
942          SecVBitNode* n2 =
943             VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
944          *n2 = *n;
945          VG_(OSetGen_Insert)(secVBitTable2, n2);
946       }
947    }
948 
949    // Get the before and after sizes.
950    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
951    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
952 
953    // Destroy the old table, and put the new one in its place.
954    VG_(OSetGen_Destroy)(secVBitTable);
955    secVBitTable = secVBitTable2;
956 
957    if (VG_(clo_verbosity) > 1) {
958       Char percbuf[6];
959       VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
960       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
961                    n_nodes, n_survivors, percbuf);
962    }
963 
964    // Increase table size if necessary.
965    if (n_survivors > (secVBitLimit * MAX_SURVIVOR_PROPORTION)) {
966       secVBitLimit *= TABLE_GROWTH_FACTOR;
967       if (VG_(clo_verbosity) > 1)
968          VG_(message)(Vg_DebugMsg, "memcheck GC: increase table size to %d\n",
969                       secVBitLimit);
970    }
971 }
972 
get_sec_vbits8(Addr a)973 static UWord get_sec_vbits8(Addr a)
974 {
975    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
976    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
977    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
978    UChar        vbits8;
979    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
980    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
981    // make it to the secondary V bits table.
982    vbits8 = n->vbits8[amod];
983    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
984    return vbits8;
985 }
986 
set_sec_vbits8(Addr a,UWord vbits8)987 static void set_sec_vbits8(Addr a, UWord vbits8)
988 {
989    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
990    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
991    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
992    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
993    // make it to the secondary V bits table.
994    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
995    if (n) {
996       n->vbits8[amod] = vbits8;     // update
997       n->last_touched = GCs_done;
998       sec_vbits_updates++;
999    } else {
1000       // New node:  assign the specific byte, make the rest invalid (they
1001       // should never be read as-is, but be cautious).
1002       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1003       n->a            = aAligned;
1004       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1005          n->vbits8[i] = V_BITS8_UNDEFINED;
1006       }
1007       n->vbits8[amod] = vbits8;
1008       n->last_touched = GCs_done;
1009 
1010       // Do a table GC if necessary.  Nb: do this before inserting the new
1011       // node, to avoid erroneously GC'ing the new node.
1012       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1013          gcSecVBitTable();
1014       }
1015 
1016       // Insert the new node.
1017       VG_(OSetGen_Insert)(secVBitTable, n);
1018       sec_vbits_new_nodes++;
1019 
1020       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1021       if (n_secVBit_nodes > max_secVBit_nodes)
1022          max_secVBit_nodes = n_secVBit_nodes;
1023    }
1024 }
1025 
1026 /* --------------- Endianness helpers --------------- */
1027 
1028 /* Returns the offset in memory of the byteno-th most significant byte
1029    in a wordszB-sized word, given the specified endianness. */
byte_offset_w(UWord wordszB,Bool bigendian,UWord byteno)1030 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1031                                     UWord byteno ) {
1032    return bigendian ? (wordszB-1-byteno) : byteno;
1033 }
1034 
1035 
1036 /* --------------- Ignored address ranges --------------- */
1037 
1038 #define M_IGNORE_RANGES 4
1039 
1040 typedef
1041    struct {
1042       Int  used;
1043       Addr start[M_IGNORE_RANGES];
1044       Addr end[M_IGNORE_RANGES];
1045    }
1046    IgnoreRanges;
1047 
1048 static IgnoreRanges ignoreRanges;
1049 
MC_(in_ignored_range)1050 INLINE Bool MC_(in_ignored_range) ( Addr a )
1051 {
1052    Int i;
1053    if (LIKELY(ignoreRanges.used == 0))
1054       return False;
1055    for (i = 0; i < ignoreRanges.used; i++) {
1056       if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i])
1057          return True;
1058    }
1059    return False;
1060 }
1061 
1062 /* Parse two Addr separated by a dash, or fail. */
1063 
parse_range(UChar ** ppc,Addr * result1,Addr * result2)1064 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 )
1065 {
1066    Bool ok = VG_(parse_Addr) (ppc, result1);
1067    if (!ok)
1068       return False;
1069    if (**ppc != '-')
1070       return False;
1071    (*ppc)++;
1072    ok = VG_(parse_Addr) (ppc, result2);
1073    if (!ok)
1074       return False;
1075    return True;
1076 }
1077 
1078 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1079    fail. */
1080 
parse_ignore_ranges(UChar * str0)1081 static Bool parse_ignore_ranges ( UChar* str0 )
1082 {
1083    Addr start, end;
1084    Bool ok;
1085    UChar*  str = str0;
1086    UChar** ppc = &str;
1087    ignoreRanges.used = 0;
1088    while (1) {
1089       ok = parse_range(ppc, &start, &end);
1090       if (!ok)
1091          return False;
1092       if (ignoreRanges.used >= M_IGNORE_RANGES)
1093          return False;
1094       ignoreRanges.start[ignoreRanges.used] = start;
1095       ignoreRanges.end[ignoreRanges.used] = end;
1096       ignoreRanges.used++;
1097       if (**ppc == 0)
1098          return True;
1099       if (**ppc != ',')
1100          return False;
1101       (*ppc)++;
1102    }
1103    /*NOTREACHED*/
1104    return False;
1105 }
1106 
1107 
1108 /* --------------- Load/store slow cases. --------------- */
1109 
1110 static
1111 __attribute__((noinline))
mc_LOADVn_slow(Addr a,SizeT nBits,Bool bigendian)1112 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1113 {
1114    /* Make up a 64-bit result V word, which contains the loaded data for
1115       valid addresses and Defined for invalid addresses.  Iterate over
1116       the bytes in the word, from the most significant down to the
1117       least. */
1118    ULong vbits64     = V_BITS64_UNDEFINED;
1119    SizeT szB         = nBits / 8;
1120    SSizeT i;                        // Must be signed.
1121    SizeT n_addrs_bad = 0;
1122    Addr  ai;
1123    Bool  partial_load_exemption_applies;
1124    UChar vbits8;
1125    Bool  ok;
1126 
1127    PROF_EVENT(30, "mc_LOADVn_slow");
1128 
1129    /* ------------ BEGIN semi-fast cases ------------ */
1130    /* These deal quickly-ish with the common auxiliary primary map
1131       cases on 64-bit platforms.  Are merely a speedup hack; can be
1132       omitted without loss of correctness/functionality.  Note that in
1133       both cases the "sizeof(void*) == 8" causes these cases to be
1134       folded out by compilers on 32-bit platforms.  These are derived
1135       from LOADV64 and LOADV32.
1136    */
1137    if (LIKELY(sizeof(void*) == 8
1138                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1139       SecMap* sm       = get_secmap_for_reading(a);
1140       UWord   sm_off16 = SM_OFF_16(a);
1141       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1142       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1143          return V_BITS64_DEFINED;
1144       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1145          return V_BITS64_UNDEFINED;
1146       /* else fall into the slow case */
1147    }
1148    if (LIKELY(sizeof(void*) == 8
1149                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1150       SecMap* sm = get_secmap_for_reading(a);
1151       UWord sm_off = SM_OFF(a);
1152       UWord vabits8 = sm->vabits8[sm_off];
1153       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1154          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1155       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1156          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1157       /* else fall into slow case */
1158    }
1159    /* ------------ END semi-fast cases ------------ */
1160 
1161    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1162 
1163    for (i = szB-1; i >= 0; i--) {
1164       PROF_EVENT(31, "mc_LOADVn_slow(loop)");
1165       ai = a + byte_offset_w(szB, bigendian, i);
1166       ok = get_vbits8(ai, &vbits8);
1167       if (!ok) n_addrs_bad++;
1168       vbits64 <<= 8;
1169       vbits64 |= vbits8;
1170    }
1171 
1172    /* This is a hack which avoids producing errors for code which
1173       insists in stepping along byte strings in aligned word-sized
1174       chunks, and there is a partially defined word at the end.  (eg,
1175       optimised strlen).  Such code is basically broken at least WRT
1176       semantics of ANSI C, but sometimes users don't have the option
1177       to fix it, and so this option is provided.  Note it is now
1178       defaulted to not-engaged.
1179 
1180       A load from a partially-addressible place is allowed if:
1181       - the command-line flag is set
1182       - it's a word-sized, word-aligned load
1183       - at least one of the addresses in the word *is* valid
1184    */
1185    partial_load_exemption_applies
1186       = MC_(clo_partial_loads_ok) && szB == VG_WORDSIZE
1187                                    && VG_IS_WORD_ALIGNED(a)
1188                                    && n_addrs_bad < VG_WORDSIZE;
1189 
1190    if (n_addrs_bad > 0 && !partial_load_exemption_applies)
1191       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1192 
1193    return vbits64;
1194 }
1195 
1196 
1197 static
1198 __attribute__((noinline))
mc_STOREVn_slow(Addr a,SizeT nBits,ULong vbytes,Bool bigendian)1199 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1200 {
1201    SizeT szB = nBits / 8;
1202    SizeT i, n_addrs_bad = 0;
1203    UChar vbits8;
1204    Addr  ai;
1205    Bool  ok;
1206 
1207    PROF_EVENT(35, "mc_STOREVn_slow");
1208 
1209    /* ------------ BEGIN semi-fast cases ------------ */
1210    /* These deal quickly-ish with the common auxiliary primary map
1211       cases on 64-bit platforms.  Are merely a speedup hack; can be
1212       omitted without loss of correctness/functionality.  Note that in
1213       both cases the "sizeof(void*) == 8" causes these cases to be
1214       folded out by compilers on 32-bit platforms.  These are derived
1215       from STOREV64 and STOREV32.
1216    */
1217    if (LIKELY(sizeof(void*) == 8
1218                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1219       SecMap* sm       = get_secmap_for_reading(a);
1220       UWord   sm_off16 = SM_OFF_16(a);
1221       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1222       if (LIKELY( !is_distinguished_sm(sm) &&
1223                           (VA_BITS16_DEFINED   == vabits16 ||
1224                            VA_BITS16_UNDEFINED == vabits16) )) {
1225          /* Handle common case quickly: a is suitably aligned, */
1226          /* is mapped, and is addressible. */
1227          // Convert full V-bits in register to compact 2-bit form.
1228          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1229             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1230             return;
1231          } else if (V_BITS64_UNDEFINED == vbytes) {
1232             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1233             return;
1234          }
1235          /* else fall into the slow case */
1236       }
1237       /* else fall into the slow case */
1238    }
1239    if (LIKELY(sizeof(void*) == 8
1240                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1241       SecMap* sm      = get_secmap_for_reading(a);
1242       UWord   sm_off  = SM_OFF(a);
1243       UWord   vabits8 = sm->vabits8[sm_off];
1244       if (LIKELY( !is_distinguished_sm(sm) &&
1245                           (VA_BITS8_DEFINED   == vabits8 ||
1246                            VA_BITS8_UNDEFINED == vabits8) )) {
1247          /* Handle common case quickly: a is suitably aligned, */
1248          /* is mapped, and is addressible. */
1249          // Convert full V-bits in register to compact 2-bit form.
1250          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1251             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1252             return;
1253          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1254             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1255             return;
1256          }
1257          /* else fall into the slow case */
1258       }
1259       /* else fall into the slow case */
1260    }
1261    /* ------------ END semi-fast cases ------------ */
1262 
1263    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1264 
1265    /* Dump vbytes in memory, iterating from least to most significant
1266       byte.  At the same time establish addressibility of the location. */
1267    for (i = 0; i < szB; i++) {
1268       PROF_EVENT(36, "mc_STOREVn_slow(loop)");
1269       ai     = a + byte_offset_w(szB, bigendian, i);
1270       vbits8 = vbytes & 0xff;
1271       ok     = set_vbits8(ai, vbits8);
1272       if (!ok) n_addrs_bad++;
1273       vbytes >>= 8;
1274    }
1275 
1276    /* If an address error has happened, report it. */
1277    if (n_addrs_bad > 0)
1278       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1279 }
1280 
1281 
1282 /*------------------------------------------------------------*/
1283 /*--- Setting permissions over address ranges.             ---*/
1284 /*------------------------------------------------------------*/
1285 
set_address_range_perms(Addr a,SizeT lenT,UWord vabits16,UWord dsm_num)1286 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1287                                       UWord dsm_num )
1288 {
1289    UWord    sm_off, sm_off16;
1290    UWord    vabits2 = vabits16 & 0x3;
1291    SizeT    lenA, lenB, len_to_next_secmap;
1292    Addr     aNext;
1293    SecMap*  sm;
1294    SecMap** sm_ptr;
1295    SecMap*  example_dsm;
1296 
1297    PROF_EVENT(150, "set_address_range_perms");
1298 
1299    /* Check the V+A bits make sense. */
1300    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
1301              VA_BITS16_UNDEFINED == vabits16 ||
1302              VA_BITS16_DEFINED   == vabits16);
1303 
1304    // This code should never write PDBs;  ensure this.  (See comment above
1305    // set_vabits2().)
1306    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1307 
1308    if (lenT == 0)
1309       return;
1310 
1311    if (lenT > 256 * 1024 * 1024) {
1312       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1313          Char* s = "unknown???";
1314          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1315          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1316          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
1317          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1318                                   "large range [0x%lx, 0x%lx) (%s)\n",
1319                                   a, a + lenT, s);
1320       }
1321    }
1322 
1323 #ifndef PERF_FAST_SARP
1324    /*------------------ debug-only case ------------------ */
1325    {
1326       // Endianness doesn't matter here because all bytes are being set to
1327       // the same value.
1328       // Nb: We don't have to worry about updating the sec-V-bits table
1329       // after these set_vabits2() calls because this code never writes
1330       // VA_BITS2_PARTDEFINED values.
1331       SizeT i;
1332       for (i = 0; i < lenT; i++) {
1333          set_vabits2(a + i, vabits2);
1334       }
1335       return;
1336    }
1337 #endif
1338 
1339    /*------------------ standard handling ------------------ */
1340 
1341    /* Get the distinguished secondary that we might want
1342       to use (part of the space-compression scheme). */
1343    example_dsm = &sm_distinguished[dsm_num];
1344 
1345    // We have to handle ranges covering various combinations of partial and
1346    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
1347    // Cases marked with a '*' are common.
1348    //
1349    //   TYPE                                             PARTS USED
1350    //   ----                                             ----------
1351    // * one partial sec-map                  (p)         1
1352    // - one whole sec-map                    (P)         2
1353    //
1354    // * two partial sec-maps                 (pp)        1,3
1355    // - one partial, one whole sec-map       (pP)        1,2
1356    // - one whole, one partial sec-map       (Pp)        2,3
1357    // - two whole sec-maps                   (PP)        2,2
1358    //
1359    // * one partial, one whole, one partial  (pPp)       1,2,3
1360    // - one partial, two whole               (pPP)       1,2,2
1361    // - two whole, one partial               (PPp)       2,2,3
1362    // - three whole                          (PPP)       2,2,2
1363    //
1364    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
1365    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
1366    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
1367    // - N whole                              (PP...PP)   2,2...2,3
1368 
1369    // Break up total length (lenT) into two parts:  length in the first
1370    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
1371    aNext = start_of_this_sm(a) + SM_SIZE;
1372    len_to_next_secmap = aNext - a;
1373    if ( lenT <= len_to_next_secmap ) {
1374       // Range entirely within one sec-map.  Covers almost all cases.
1375       PROF_EVENT(151, "set_address_range_perms-single-secmap");
1376       lenA = lenT;
1377       lenB = 0;
1378    } else if (is_start_of_sm(a)) {
1379       // Range spans at least one whole sec-map, and starts at the beginning
1380       // of a sec-map; skip to Part 2.
1381       PROF_EVENT(152, "set_address_range_perms-startof-secmap");
1382       lenA = 0;
1383       lenB = lenT;
1384       goto part2;
1385    } else {
1386       // Range spans two or more sec-maps, first one is partial.
1387       PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
1388       lenA = len_to_next_secmap;
1389       lenB = lenT - lenA;
1390    }
1391 
1392    //------------------------------------------------------------------------
1393    // Part 1: Deal with the first sec_map.  Most of the time the range will be
1394    // entirely within a sec_map and this part alone will suffice.  Also,
1395    // doing it this way lets us avoid repeatedly testing for the crossing of
1396    // a sec-map boundary within these loops.
1397    //------------------------------------------------------------------------
1398 
1399    // If it's distinguished, make it undistinguished if necessary.
1400    sm_ptr = get_secmap_ptr(a);
1401    if (is_distinguished_sm(*sm_ptr)) {
1402       if (*sm_ptr == example_dsm) {
1403          // Sec-map already has the V+A bits that we want, so skip.
1404          PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
1405          a    = aNext;
1406          lenA = 0;
1407       } else {
1408          PROF_EVENT(155, "set_address_range_perms-dist-sm1");
1409          *sm_ptr = copy_for_writing(*sm_ptr);
1410       }
1411    }
1412    sm = *sm_ptr;
1413 
1414    // 1 byte steps
1415    while (True) {
1416       if (VG_IS_8_ALIGNED(a)) break;
1417       if (lenA < 1)           break;
1418       PROF_EVENT(156, "set_address_range_perms-loop1a");
1419       sm_off = SM_OFF(a);
1420       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1421       a    += 1;
1422       lenA -= 1;
1423    }
1424    // 8-aligned, 8 byte steps
1425    while (True) {
1426       if (lenA < 8) break;
1427       PROF_EVENT(157, "set_address_range_perms-loop8a");
1428       sm_off16 = SM_OFF_16(a);
1429       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1430       a    += 8;
1431       lenA -= 8;
1432    }
1433    // 1 byte steps
1434    while (True) {
1435       if (lenA < 1) break;
1436       PROF_EVENT(158, "set_address_range_perms-loop1b");
1437       sm_off = SM_OFF(a);
1438       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1439       a    += 1;
1440       lenA -= 1;
1441    }
1442 
1443    // We've finished the first sec-map.  Is that it?
1444    if (lenB == 0)
1445       return;
1446 
1447    //------------------------------------------------------------------------
1448    // Part 2: Fast-set entire sec-maps at a time.
1449    //------------------------------------------------------------------------
1450   part2:
1451    // 64KB-aligned, 64KB steps.
1452    // Nb: we can reach here with lenB < SM_SIZE
1453    tl_assert(0 == lenA);
1454    while (True) {
1455       if (lenB < SM_SIZE) break;
1456       tl_assert(is_start_of_sm(a));
1457       PROF_EVENT(159, "set_address_range_perms-loop64K");
1458       sm_ptr = get_secmap_ptr(a);
1459       if (!is_distinguished_sm(*sm_ptr)) {
1460          PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
1461          // Free the non-distinguished sec-map that we're replacing.  This
1462          // case happens moderately often, enough to be worthwhile.
1463          VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1464       }
1465       update_SM_counts(*sm_ptr, example_dsm);
1466       // Make the sec-map entry point to the example DSM
1467       *sm_ptr = example_dsm;
1468       lenB -= SM_SIZE;
1469       a    += SM_SIZE;
1470    }
1471 
1472    // We've finished the whole sec-maps.  Is that it?
1473    if (lenB == 0)
1474       return;
1475 
1476    //------------------------------------------------------------------------
1477    // Part 3: Finish off the final partial sec-map, if necessary.
1478    //------------------------------------------------------------------------
1479 
1480    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1481 
1482    // If it's distinguished, make it undistinguished if necessary.
1483    sm_ptr = get_secmap_ptr(a);
1484    if (is_distinguished_sm(*sm_ptr)) {
1485       if (*sm_ptr == example_dsm) {
1486          // Sec-map already has the V+A bits that we want, so stop.
1487          PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
1488          return;
1489       } else {
1490          PROF_EVENT(162, "set_address_range_perms-dist-sm2");
1491          *sm_ptr = copy_for_writing(*sm_ptr);
1492       }
1493    }
1494    sm = *sm_ptr;
1495 
1496    // 8-aligned, 8 byte steps
1497    while (True) {
1498       if (lenB < 8) break;
1499       PROF_EVENT(163, "set_address_range_perms-loop8b");
1500       sm_off16 = SM_OFF_16(a);
1501       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1502       a    += 8;
1503       lenB -= 8;
1504    }
1505    // 1 byte steps
1506    while (True) {
1507       if (lenB < 1) return;
1508       PROF_EVENT(164, "set_address_range_perms-loop1c");
1509       sm_off = SM_OFF(a);
1510       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1511       a    += 1;
1512       lenB -= 1;
1513    }
1514 }
1515 
1516 
1517 /* --- Set permissions for arbitrary address ranges --- */
1518 
MC_(make_mem_noaccess)1519 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1520 {
1521    PROF_EVENT(40, "MC_(make_mem_noaccess)");
1522    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1523    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1524    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1525       ocache_sarp_Clear_Origins ( a, len );
1526 }
1527 
make_mem_undefined(Addr a,SizeT len)1528 static void make_mem_undefined ( Addr a, SizeT len )
1529 {
1530    PROF_EVENT(41, "make_mem_undefined");
1531    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1532    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1533 }
1534 
MC_(make_mem_undefined_w_otag)1535 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1536 {
1537    PROF_EVENT(41, "MC_(make_mem_undefined)");
1538    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1539    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1540    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1541       ocache_sarp_Set_Origins ( a, len, otag );
1542 }
1543 
1544 static
make_mem_undefined_w_tid_and_okind(Addr a,SizeT len,ThreadId tid,UInt okind)1545 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1546                                           ThreadId tid, UInt okind )
1547 {
1548    UInt        ecu;
1549    ExeContext* here;
1550    /* VG_(record_ExeContext) checks for validity of tid, and asserts
1551       if it is invalid.  So no need to do it here. */
1552    tl_assert(okind <= 3);
1553    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1554    tl_assert(here);
1555    ecu = VG_(get_ECU_from_ExeContext)(here);
1556    tl_assert(VG_(is_plausible_ECU)(ecu));
1557    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1558 }
1559 
1560 static
make_mem_undefined_w_tid(Addr a,SizeT len,ThreadId tid)1561 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) {
1562    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1563 }
1564 
1565 
MC_(make_mem_defined)1566 void MC_(make_mem_defined) ( Addr a, SizeT len )
1567 {
1568    PROF_EVENT(42, "MC_(make_mem_defined)");
1569    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1570    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1571    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1572       ocache_sarp_Clear_Origins ( a, len );
1573 }
1574 
1575 /* For each byte in [a,a+len), if the byte is addressable, make it be
1576    defined, but if it isn't addressible, leave it alone.  In other
1577    words a version of MC_(make_mem_defined) that doesn't mess with
1578    addressibility.  Low-performance implementation. */
make_mem_defined_if_addressable(Addr a,SizeT len)1579 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1580 {
1581    SizeT i;
1582    UChar vabits2;
1583    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1584    for (i = 0; i < len; i++) {
1585       vabits2 = get_vabits2( a+i );
1586       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1587          set_vabits2(a+i, VA_BITS2_DEFINED);
1588          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1589             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1590          }
1591       }
1592    }
1593 }
1594 
1595 /* Similarly (needed for mprotect handling ..) */
make_mem_defined_if_noaccess(Addr a,SizeT len)1596 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1597 {
1598    SizeT i;
1599    UChar vabits2;
1600    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1601    for (i = 0; i < len; i++) {
1602       vabits2 = get_vabits2( a+i );
1603       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1604          set_vabits2(a+i, VA_BITS2_DEFINED);
1605          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1606             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1607          }
1608       }
1609    }
1610 }
1611 
1612 /* --- Block-copy permissions (needed for implementing realloc() and
1613        sys_mremap). --- */
1614 
MC_(copy_address_range_state)1615 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1616 {
1617    SizeT i, j;
1618    UChar vabits2, vabits8;
1619    Bool  aligned, nooverlap;
1620 
1621    DEBUG("MC_(copy_address_range_state)\n");
1622    PROF_EVENT(50, "MC_(copy_address_range_state)");
1623 
1624    if (len == 0 || src == dst)
1625       return;
1626 
1627    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1628    nooverlap = src+len <= dst || dst+len <= src;
1629 
1630    if (nooverlap && aligned) {
1631 
1632       /* Vectorised fast case, when no overlap and suitably aligned */
1633       /* vector loop */
1634       i = 0;
1635       while (len >= 4) {
1636          vabits8 = get_vabits8_for_aligned_word32( src+i );
1637          set_vabits8_for_aligned_word32( dst+i, vabits8 );
1638          if (LIKELY(VA_BITS8_DEFINED == vabits8
1639                             || VA_BITS8_UNDEFINED == vabits8
1640                             || VA_BITS8_NOACCESS == vabits8)) {
1641             /* do nothing */
1642          } else {
1643             /* have to copy secondary map info */
1644             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1645                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1646             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1647                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1648             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1649                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1650             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1651                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1652          }
1653          i += 4;
1654          len -= 4;
1655       }
1656       /* fixup loop */
1657       while (len >= 1) {
1658          vabits2 = get_vabits2( src+i );
1659          set_vabits2( dst+i, vabits2 );
1660          if (VA_BITS2_PARTDEFINED == vabits2) {
1661             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1662          }
1663          i++;
1664          len--;
1665       }
1666 
1667    } else {
1668 
1669       /* We have to do things the slow way */
1670       if (src < dst) {
1671          for (i = 0, j = len-1; i < len; i++, j--) {
1672             PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
1673             vabits2 = get_vabits2( src+j );
1674             set_vabits2( dst+j, vabits2 );
1675             if (VA_BITS2_PARTDEFINED == vabits2) {
1676                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1677             }
1678          }
1679       }
1680 
1681       if (src > dst) {
1682          for (i = 0; i < len; i++) {
1683             PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
1684             vabits2 = get_vabits2( src+i );
1685             set_vabits2( dst+i, vabits2 );
1686             if (VA_BITS2_PARTDEFINED == vabits2) {
1687                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1688             }
1689          }
1690       }
1691    }
1692 
1693 }
1694 
1695 
1696 /*------------------------------------------------------------*/
1697 /*--- Origin tracking stuff - cache basics                 ---*/
1698 /*------------------------------------------------------------*/
1699 
1700 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1701    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1702 
1703    Note that this implementation draws inspiration from the "origin
1704    tracking by value piggybacking" scheme described in "Tracking Bad
1705    Apples: Reporting the Origin of Null and Undefined Value Errors"
1706    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1707    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1708    implemented completely differently.
1709 
1710    Origin tags and ECUs -- about the shadow values
1711    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1712 
1713    This implementation tracks the defining point of all uninitialised
1714    values using so called "origin tags", which are 32-bit integers,
1715    rather than using the values themselves to encode the origins.  The
1716    latter, so-called value piggybacking", is what the OOPSLA07 paper
1717    describes.
1718 
1719    Origin tags, as tracked by the machinery below, are 32-bit unsigned
1720    ints (UInts), regardless of the machine's word size.  Each tag
1721    comprises an upper 30-bit ECU field and a lower 2-bit
1722    'kind' field.  The ECU field is a number given out by m_execontext
1723    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
1724    directly as an origin tag (otag), but in fact we want to put
1725    additional information 'kind' field to indicate roughly where the
1726    tag came from.  This helps print more understandable error messages
1727    for the user -- it has no other purpose.  In summary:
1728 
1729    * Both ECUs and origin tags are represented as 32-bit words
1730 
1731    * m_execontext and the core-tool interface deal purely in ECUs.
1732      They have no knowledge of origin tags - that is a purely
1733      Memcheck-internal matter.
1734 
1735    * all valid ECUs have the lowest 2 bits zero and at least
1736      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
1737 
1738    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
1739      constants defined in mc_include.h.
1740 
1741    * to convert an otag back to an ECU, AND it with ~3
1742 
1743    One important fact is that no valid otag is zero.  A zero otag is
1744    used by the implementation to indicate "no origin", which could
1745    mean that either the value is defined, or it is undefined but the
1746    implementation somehow managed to lose the origin.
1747 
1748    The ECU used for memory created by malloc etc is derived from the
1749    stack trace at the time the malloc etc happens.  This means the
1750    mechanism can show the exact allocation point for heap-created
1751    uninitialised values.
1752 
1753    In contrast, it is simply too expensive to create a complete
1754    backtrace for each stack allocation.  Therefore we merely use a
1755    depth-1 backtrace for stack allocations, which can be done once at
1756    translation time, rather than N times at run time.  The result of
1757    this is that, for stack created uninitialised values, Memcheck can
1758    only show the allocating function, and not what called it.
1759    Furthermore, compilers tend to move the stack pointer just once at
1760    the start of the function, to allocate all locals, and so in fact
1761    the stack origin almost always simply points to the opening brace
1762    of the function.  Net result is, for stack origins, the mechanism
1763    can tell you in which function the undefined value was created, but
1764    that's all.  Users will need to carefully check all locals in the
1765    specified function.
1766 
1767    Shadowing registers and memory
1768    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1769 
1770    Memory is shadowed using a two level cache structure (ocacheL1 and
1771    ocacheL2).  Memory references are first directed to ocacheL1.  This
1772    is a traditional 2-way set associative cache with 32-byte lines and
1773    approximate LRU replacement within each set.
1774 
1775    A naive implementation would require storing one 32 bit otag for
1776    each byte of memory covered, a 4:1 space overhead.  Instead, there
1777    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
1778    that shows which of the 4 bytes have that shadow value and which
1779    have a shadow value of zero (indicating no origin).  Hence a lot of
1780    space is saved, but the cost is that only one different origin per
1781    4 bytes of address space can be represented.  This is a source of
1782    imprecision, but how much of a problem it really is remains to be
1783    seen.
1784 
1785    A cache line that contains all zeroes ("no origins") contains no
1786    useful information, and can be ejected from the L1 cache "for
1787    free", in the sense that a read miss on the L1 causes a line of
1788    zeroes to be installed.  However, ejecting a line containing
1789    nonzeroes risks losing origin information permanently.  In order to
1790    prevent such lossage, ejected nonzero lines are placed in a
1791    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
1792    lines.  This can grow arbitrarily large, and so should ensure that
1793    Memcheck runs out of memory in preference to losing useful origin
1794    info due to cache size limitations.
1795 
1796    Shadowing registers is a bit tricky, because the shadow values are
1797    32 bits, regardless of the size of the register.  That gives a
1798    problem for registers smaller than 32 bits.  The solution is to
1799    find spaces in the guest state that are unused, and use those to
1800    shadow guest state fragments smaller than 32 bits.  For example, on
1801    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
1802    shadow are allocated for the register's otag, then there are still
1803    12 bytes left over which could be used to shadow 3 other values.
1804 
1805    This implies there is some non-obvious mapping from guest state
1806    (start,length) pairs to the relevant shadow offset (for the origin
1807    tags).  And it is unfortunately guest-architecture specific.  The
1808    mapping is contained in mc_machine.c, which is quite lengthy but
1809    straightforward.
1810 
1811    Instrumenting the IR
1812    ~~~~~~~~~~~~~~~~~~~~
1813 
1814    Instrumentation is largely straightforward, and done by the
1815    functions schemeE and schemeS in mc_translate.c.  These generate
1816    code for handling the origin tags of expressions (E) and statements
1817    (S) respectively.  The rather strange names are a reference to the
1818    "compilation schemes" shown in Simon Peyton Jones' book "The
1819    Implementation of Functional Programming Languages" (Prentice Hall,
1820    1987, see
1821    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
1822 
1823    schemeS merely arranges to move shadow values around the guest
1824    state to track the incoming IR.  schemeE is largely trivial too.
1825    The only significant point is how to compute the otag corresponding
1826    to binary (or ternary, quaternary, etc) operator applications.  The
1827    rule is simple: just take whichever value is larger (32-bit
1828    unsigned max).  Constants get the special value zero.  Hence this
1829    rule always propagates a nonzero (known) otag in preference to a
1830    zero (unknown, or more likely, value-is-defined) tag, as we want.
1831    If two different undefined values are inputs to a binary operator
1832    application, then which is propagated is arbitrary, but that
1833    doesn't matter, since the program is erroneous in using either of
1834    the values, and so there's no point in attempting to propagate
1835    both.
1836 
1837    Since constants are abstracted to (otag) zero, much of the
1838    instrumentation code can be folded out without difficulty by the
1839    generic post-instrumentation IR cleanup pass, using these rules:
1840    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
1841    constants is evaluated at JIT time.  And the resulting dead code
1842    removal.  In practice this causes surprisingly few Max32Us to
1843    survive through to backend code generation.
1844 
1845    Integration with the V-bits machinery
1846    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1847 
1848    This is again largely straightforward.  Mostly the otag and V bits
1849    stuff are independent.  The only point of interaction is when the V
1850    bits instrumenter creates a call to a helper function to report an
1851    uninitialised value error -- in that case it must first use schemeE
1852    to get hold of the origin tag expression for the value, and pass
1853    that to the helper too.
1854 
1855    There is the usual stuff to do with setting address range
1856    permissions.  When memory is painted undefined, we must also know
1857    the origin tag to paint with, which involves some tedious plumbing,
1858    particularly to do with the fast case stack handlers.  When memory
1859    is painted defined or noaccess then the origin tags must be forced
1860    to zero.
1861 
1862    One of the goals of the implementation was to ensure that the
1863    non-origin tracking mode isn't slowed down at all.  To do this,
1864    various functions to do with memory permissions setting (again,
1865    mostly pertaining to the stack) are duplicated for the with- and
1866    without-otag case.
1867 
1868    Dealing with stack redzones, and the NIA cache
1869    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1870 
1871    This is one of the few non-obvious parts of the implementation.
1872 
1873    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
1874    reserved area below the stack pointer, that can be used as scratch
1875    space by compiler generated code for functions.  In the Memcheck
1876    sources this is referred to as the "stack redzone".  The important
1877    thing here is that such redzones are considered volatile across
1878    function calls and returns.  So Memcheck takes care to mark them as
1879    undefined for each call and return, on the afflicted platforms.
1880    Past experience shows this is essential in order to get reliable
1881    messages about uninitialised values that come from the stack.
1882 
1883    So the question is, when we paint a redzone undefined, what origin
1884    tag should we use for it?  Consider a function f() calling g().  If
1885    we paint the redzone using an otag derived from the ExeContext of
1886    the CALL/BL instruction in f, then any errors in g causing it to
1887    use uninitialised values that happen to lie in the redzone, will be
1888    reported as having their origin in f.  Which is highly confusing.
1889 
1890    The same applies for returns: if, on a return, we paint the redzone
1891    using a origin tag derived from the ExeContext of the RET/BLR
1892    instruction in g, then any later errors in f causing it to use
1893    uninitialised values in the redzone, will be reported as having
1894    their origin in g.  Which is just as confusing.
1895 
1896    To do it right, in both cases we need to use an origin tag which
1897    pertains to the instruction which dynamically follows the CALL/BL
1898    or RET/BLR.  In short, one derived from the NIA - the "next
1899    instruction address".
1900 
1901    To make this work, Memcheck's redzone-painting helper,
1902    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
1903    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
1904    ExeContext's ECU as the basis for the otag used to paint the
1905    redzone.  The expensive part of this is converting an NIA into an
1906    ECU, since this happens once for every call and every return.  So
1907    we use a simple 511-line, 2-way set associative cache
1908    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
1909    the cost out.
1910 
1911    Further background comments
1912    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1913 
1914    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
1915    > it really just the address of the relevant ExeContext?
1916 
1917    Well, it's not the address, but a value which has a 1-1 mapping
1918    with ExeContexts, and is guaranteed not to be zero, since zero
1919    denotes (to memcheck) "unknown origin or defined value".  So these
1920    UInts are just numbers starting at 4 and incrementing by 4; each
1921    ExeContext is given a number when it is created.  (*** NOTE this
1922    confuses otags and ECUs; see comments above ***).
1923 
1924    Making these otags 32-bit regardless of the machine's word size
1925    makes the 64-bit implementation easier (next para).  And it doesn't
1926    really limit us in any way, since for the tags to overflow would
1927    require that the program somehow caused 2^30-1 different
1928    ExeContexts to be created, in which case it is probably in deep
1929    trouble.  Not to mention V will have soaked up many tens of
1930    gigabytes of memory merely to store them all.
1931 
1932    So having 64-bit origins doesn't really buy you anything, and has
1933    the following downsides:
1934 
1935    Suppose that instead, an otag is a UWord.  This would mean that, on
1936    a 64-bit target,
1937 
1938    1. It becomes hard to shadow any element of guest state which is
1939       smaller than 8 bytes.  To do so means you'd need to find some
1940       8-byte-sized hole in the guest state which you don't want to
1941       shadow, and use that instead to hold the otag.  On ppc64, the
1942       condition code register(s) are split into 20 UChar sized pieces,
1943       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
1944       and so that would entail finding 160 bytes somewhere else in the
1945       guest state.
1946 
1947       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
1948       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
1949       same) and so I had to look for 4 untracked otag-sized areas in
1950       the guest state to make that possible.
1951 
1952       The same problem exists of course when origin tags are only 32
1953       bits, but it's less extreme.
1954 
1955    2. (More compelling) it doubles the size of the origin shadow
1956       memory.  Given that the shadow memory is organised as a fixed
1957       size cache, and that accuracy of tracking is limited by origins
1958       falling out the cache due to space conflicts, this isn't good.
1959 
1960    > Another question: is the origin tracking perfect, or are there
1961    > cases where it fails to determine an origin?
1962 
1963    It is imperfect for at least for the following reasons, and
1964    probably more:
1965 
1966    * Insufficient capacity in the origin cache.  When a line is
1967      evicted from the cache it is gone forever, and so subsequent
1968      queries for the line produce zero, indicating no origin
1969      information.  Interestingly, a line containing all zeroes can be
1970      evicted "free" from the cache, since it contains no useful
1971      information, so there is scope perhaps for some cleverer cache
1972      management schemes.  (*** NOTE, with the introduction of the
1973      second level origin tag cache, ocacheL2, this is no longer a
1974      problem. ***)
1975 
1976    * The origin cache only stores one otag per 32-bits of address
1977      space, plus 4 bits indicating which of the 4 bytes has that tag
1978      and which are considered defined.  The result is that if two
1979      undefined bytes in the same word are stored in memory, the first
1980      stored byte's origin will be lost and replaced by the origin for
1981      the second byte.
1982 
1983    * Nonzero origin tags for defined values.  Consider a binary
1984      operator application op(x,y).  Suppose y is undefined (and so has
1985      a valid nonzero origin tag), and x is defined, but erroneously
1986      has a nonzero origin tag (defined values should have tag zero).
1987      If the erroneous tag has a numeric value greater than y's tag,
1988      then the rule for propagating origin tags though binary
1989      operations, which is simply to take the unsigned max of the two
1990      tags, will erroneously propagate x's tag rather than y's.
1991 
1992    * Some obscure uses of x86/amd64 byte registers can cause lossage
1993      or confusion of origins.  %AH .. %DH are treated as different
1994      from, and unrelated to, their parent registers, %EAX .. %EDX.
1995      So some wierd sequences like
1996 
1997         movb undefined-value, %AH
1998         movb defined-value, %AL
1999         .. use %AX or %EAX ..
2000 
2001      will cause the origin attributed to %AH to be ignored, since %AL,
2002      %AX, %EAX are treated as the same register, and %AH as a
2003      completely separate one.
2004 
2005    But having said all that, it actually seems to work fairly well in
2006    practice.
2007 */
2008 
2009 static UWord stats_ocacheL1_find           = 0;
2010 static UWord stats_ocacheL1_found_at_1     = 0;
2011 static UWord stats_ocacheL1_found_at_N     = 0;
2012 static UWord stats_ocacheL1_misses         = 0;
2013 static UWord stats_ocacheL1_lossage        = 0;
2014 static UWord stats_ocacheL1_movefwds       = 0;
2015 
2016 static UWord stats__ocacheL2_refs          = 0;
2017 static UWord stats__ocacheL2_misses        = 0;
2018 static UWord stats__ocacheL2_n_nodes_max   = 0;
2019 
2020 /* Cache of 32-bit values, one every 32 bits of address space */
2021 
2022 #define OC_BITS_PER_LINE 5
2023 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2024 
oc_line_offset(Addr a)2025 static INLINE UWord oc_line_offset ( Addr a ) {
2026    return (a >> 2) & (OC_W32S_PER_LINE - 1);
2027 }
is_valid_oc_tag(Addr tag)2028 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2029    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2030 }
2031 
2032 #define OC_LINES_PER_SET 2
2033 
2034 #define OC_N_SET_BITS    20
2035 #define OC_N_SETS        (1 << OC_N_SET_BITS)
2036 
2037 /* These settings give:
2038    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
2039    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
2040 */
2041 
2042 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2043 
2044 
2045 typedef
2046    struct {
2047       Addr  tag;
2048       UInt  w32[OC_W32S_PER_LINE];
2049       UChar descr[OC_W32S_PER_LINE];
2050    }
2051    OCacheLine;
2052 
2053 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
2054    in use, 'n' (nonzero) if it contains at least one valid origin tag,
2055    and 'z' if all the represented tags are zero. */
classify_OCacheLine(OCacheLine * line)2056 static UChar classify_OCacheLine ( OCacheLine* line )
2057 {
2058    UWord i;
2059    if (line->tag == 1/*invalid*/)
2060       return 'e'; /* EMPTY */
2061    tl_assert(is_valid_oc_tag(line->tag));
2062    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2063       tl_assert(0 == ((~0xF) & line->descr[i]));
2064       if (line->w32[i] > 0 && line->descr[i] > 0)
2065          return 'n'; /* NONZERO - contains useful info */
2066    }
2067    return 'z'; /* ZERO - no useful info */
2068 }
2069 
2070 typedef
2071    struct {
2072       OCacheLine line[OC_LINES_PER_SET];
2073    }
2074    OCacheSet;
2075 
2076 typedef
2077    struct {
2078       OCacheSet set[OC_N_SETS];
2079    }
2080    OCache;
2081 
2082 static OCache* ocacheL1 = NULL;
2083 static UWord   ocacheL1_event_ctr = 0;
2084 
2085 static void init_ocacheL2 ( void ); /* fwds */
init_OCache(void)2086 static void init_OCache ( void )
2087 {
2088    UWord line, set;
2089    tl_assert(MC_(clo_mc_level) >= 3);
2090    tl_assert(ocacheL1 == NULL);
2091    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2092    if (ocacheL1 == NULL) {
2093       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2094                                    sizeof(OCache) );
2095    }
2096    tl_assert(ocacheL1 != NULL);
2097    for (set = 0; set < OC_N_SETS; set++) {
2098       for (line = 0; line < OC_LINES_PER_SET; line++) {
2099          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2100       }
2101    }
2102    init_ocacheL2();
2103 }
2104 
moveLineForwards(OCacheSet * set,UWord lineno)2105 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2106 {
2107    OCacheLine tmp;
2108    stats_ocacheL1_movefwds++;
2109    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2110    tmp = set->line[lineno-1];
2111    set->line[lineno-1] = set->line[lineno];
2112    set->line[lineno] = tmp;
2113 }
2114 
zeroise_OCacheLine(OCacheLine * line,Addr tag)2115 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2116    UWord i;
2117    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2118       line->w32[i] = 0; /* NO ORIGIN */
2119       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2120    }
2121    line->tag = tag;
2122 }
2123 
2124 //////////////////////////////////////////////////////////////
2125 //// OCache backing store
2126 
2127 static OSet* ocacheL2 = NULL;
2128 
ocacheL2_malloc(HChar * cc,SizeT szB)2129 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) {
2130    return VG_(malloc)(cc, szB);
2131 }
ocacheL2_free(void * v)2132 static void ocacheL2_free ( void* v ) {
2133    VG_(free)( v );
2134 }
2135 
2136 /* Stats: # nodes currently in tree */
2137 static UWord stats__ocacheL2_n_nodes = 0;
2138 
init_ocacheL2(void)2139 static void init_ocacheL2 ( void )
2140 {
2141    tl_assert(!ocacheL2);
2142    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2143    tl_assert(0 == offsetof(OCacheLine,tag));
2144    ocacheL2
2145       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2146                              NULL, /* fast cmp */
2147                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free );
2148    tl_assert(ocacheL2);
2149    stats__ocacheL2_n_nodes = 0;
2150 }
2151 
2152 /* Find line with the given tag in the tree, or NULL if not found. */
ocacheL2_find_tag(Addr tag)2153 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2154 {
2155    OCacheLine* line;
2156    tl_assert(is_valid_oc_tag(tag));
2157    stats__ocacheL2_refs++;
2158    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2159    return line;
2160 }
2161 
2162 /* Delete the line with the given tag from the tree, if it is present, and
2163    free up the associated memory. */
ocacheL2_del_tag(Addr tag)2164 static void ocacheL2_del_tag ( Addr tag )
2165 {
2166    OCacheLine* line;
2167    tl_assert(is_valid_oc_tag(tag));
2168    stats__ocacheL2_refs++;
2169    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2170    if (line) {
2171       VG_(OSetGen_FreeNode)(ocacheL2, line);
2172       tl_assert(stats__ocacheL2_n_nodes > 0);
2173       stats__ocacheL2_n_nodes--;
2174    }
2175 }
2176 
2177 /* Add a copy of the given line to the tree.  It must not already be
2178    present. */
ocacheL2_add_line(OCacheLine * line)2179 static void ocacheL2_add_line ( OCacheLine* line )
2180 {
2181    OCacheLine* copy;
2182    tl_assert(is_valid_oc_tag(line->tag));
2183    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2184    tl_assert(copy);
2185    *copy = *line;
2186    stats__ocacheL2_refs++;
2187    VG_(OSetGen_Insert)( ocacheL2, copy );
2188    stats__ocacheL2_n_nodes++;
2189    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2190       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2191 }
2192 
2193 ////
2194 //////////////////////////////////////////////////////////////
2195 
2196 __attribute__((noinline))
find_OCacheLine_SLOW(Addr a)2197 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2198 {
2199    OCacheLine *victim, *inL2;
2200    UChar c;
2201    UWord line;
2202    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2203    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2204    UWord tag     = a & tagmask;
2205    tl_assert(setno >= 0 && setno < OC_N_SETS);
2206 
2207    /* we already tried line == 0; skip therefore. */
2208    for (line = 1; line < OC_LINES_PER_SET; line++) {
2209       if (ocacheL1->set[setno].line[line].tag == tag) {
2210          if (line == 1) {
2211             stats_ocacheL1_found_at_1++;
2212          } else {
2213             stats_ocacheL1_found_at_N++;
2214          }
2215          if (UNLIKELY(0 == (ocacheL1_event_ctr++
2216                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2217             moveLineForwards( &ocacheL1->set[setno], line );
2218             line--;
2219          }
2220          return &ocacheL1->set[setno].line[line];
2221       }
2222    }
2223 
2224    /* A miss.  Use the last slot.  Implicitly this means we're
2225       ejecting the line in the last slot. */
2226    stats_ocacheL1_misses++;
2227    tl_assert(line == OC_LINES_PER_SET);
2228    line--;
2229    tl_assert(line > 0);
2230 
2231    /* First, move the to-be-ejected line to the L2 cache. */
2232    victim = &ocacheL1->set[setno].line[line];
2233    c = classify_OCacheLine(victim);
2234    switch (c) {
2235       case 'e':
2236          /* the line is empty (has invalid tag); ignore it. */
2237          break;
2238       case 'z':
2239          /* line contains zeroes.  We must ensure the backing store is
2240             updated accordingly, either by copying the line there
2241             verbatim, or by ensuring it isn't present there.  We
2242             chosse the latter on the basis that it reduces the size of
2243             the backing store. */
2244          ocacheL2_del_tag( victim->tag );
2245          break;
2246       case 'n':
2247          /* line contains at least one real, useful origin.  Copy it
2248             to the backing store. */
2249          stats_ocacheL1_lossage++;
2250          inL2 = ocacheL2_find_tag( victim->tag );
2251          if (inL2) {
2252             *inL2 = *victim;
2253          } else {
2254             ocacheL2_add_line( victim );
2255          }
2256          break;
2257       default:
2258          tl_assert(0);
2259    }
2260 
2261    /* Now we must reload the L1 cache from the backing tree, if
2262       possible. */
2263    tl_assert(tag != victim->tag); /* stay sane */
2264    inL2 = ocacheL2_find_tag( tag );
2265    if (inL2) {
2266       /* We're in luck.  It's in the L2. */
2267       ocacheL1->set[setno].line[line] = *inL2;
2268    } else {
2269       /* Missed at both levels of the cache hierarchy.  We have to
2270          declare it as full of zeroes (unknown origins). */
2271       stats__ocacheL2_misses++;
2272       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2273    }
2274 
2275    /* Move it one forwards */
2276    moveLineForwards( &ocacheL1->set[setno], line );
2277    line--;
2278 
2279    return &ocacheL1->set[setno].line[line];
2280 }
2281 
find_OCacheLine(Addr a)2282 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2283 {
2284    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2285    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2286    UWord tag     = a & tagmask;
2287 
2288    stats_ocacheL1_find++;
2289 
2290    if (OC_ENABLE_ASSERTIONS) {
2291       tl_assert(setno >= 0 && setno < OC_N_SETS);
2292       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2293    }
2294 
2295    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2296       return &ocacheL1->set[setno].line[0];
2297    }
2298 
2299    return find_OCacheLine_SLOW( a );
2300 }
2301 
set_aligned_word64_Origin_to_undef(Addr a,UInt otag)2302 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2303 {
2304    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2305    //// Set the origins for a+0 .. a+7
2306    { OCacheLine* line;
2307      UWord lineoff = oc_line_offset(a);
2308      if (OC_ENABLE_ASSERTIONS) {
2309         tl_assert(lineoff >= 0
2310                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2311      }
2312      line = find_OCacheLine( a );
2313      line->descr[lineoff+0] = 0xF;
2314      line->descr[lineoff+1] = 0xF;
2315      line->w32[lineoff+0]   = otag;
2316      line->w32[lineoff+1]   = otag;
2317    }
2318    //// END inlined, specialised version of MC_(helperc_b_store8)
2319 }
2320 
2321 
2322 /*------------------------------------------------------------*/
2323 /*--- Aligned fast case permission setters,                ---*/
2324 /*--- for dealing with stacks                              ---*/
2325 /*------------------------------------------------------------*/
2326 
2327 /*--------------------- 32-bit ---------------------*/
2328 
2329 /* Nb: by "aligned" here we mean 4-byte aligned */
2330 
make_aligned_word32_undefined(Addr a)2331 static INLINE void make_aligned_word32_undefined ( Addr a )
2332 {
2333    PROF_EVENT(300, "make_aligned_word32_undefined");
2334 
2335 #ifndef PERF_FAST_STACK2
2336    make_mem_undefined(a, 4);
2337 #else
2338    {
2339       UWord   sm_off;
2340       SecMap* sm;
2341 
2342       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2343          PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
2344          make_mem_undefined(a, 4);
2345          return;
2346       }
2347 
2348       sm                  = get_secmap_for_writing_low(a);
2349       sm_off              = SM_OFF(a);
2350       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2351    }
2352 #endif
2353 }
2354 
2355 static INLINE
make_aligned_word32_undefined_w_otag(Addr a,UInt otag)2356 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2357 {
2358    make_aligned_word32_undefined(a);
2359    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2360    //// Set the origins for a+0 .. a+3
2361    { OCacheLine* line;
2362      UWord lineoff = oc_line_offset(a);
2363      if (OC_ENABLE_ASSERTIONS) {
2364         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2365      }
2366      line = find_OCacheLine( a );
2367      line->descr[lineoff] = 0xF;
2368      line->w32[lineoff]   = otag;
2369    }
2370    //// END inlined, specialised version of MC_(helperc_b_store4)
2371 }
2372 
2373 static INLINE
make_aligned_word32_noaccess(Addr a)2374 void make_aligned_word32_noaccess ( Addr a )
2375 {
2376    PROF_EVENT(310, "make_aligned_word32_noaccess");
2377 
2378 #ifndef PERF_FAST_STACK2
2379    MC_(make_mem_noaccess)(a, 4);
2380 #else
2381    {
2382       UWord   sm_off;
2383       SecMap* sm;
2384 
2385       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2386          PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
2387          MC_(make_mem_noaccess)(a, 4);
2388          return;
2389       }
2390 
2391       sm                  = get_secmap_for_writing_low(a);
2392       sm_off              = SM_OFF(a);
2393       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2394 
2395       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2396       //// Set the origins for a+0 .. a+3.
2397       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2398          OCacheLine* line;
2399          UWord lineoff = oc_line_offset(a);
2400          if (OC_ENABLE_ASSERTIONS) {
2401             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2402          }
2403          line = find_OCacheLine( a );
2404          line->descr[lineoff] = 0;
2405       }
2406       //// END inlined, specialised version of MC_(helperc_b_store4)
2407    }
2408 #endif
2409 }
2410 
2411 /*--------------------- 64-bit ---------------------*/
2412 
2413 /* Nb: by "aligned" here we mean 8-byte aligned */
2414 
make_aligned_word64_undefined(Addr a)2415 static INLINE void make_aligned_word64_undefined ( Addr a )
2416 {
2417    PROF_EVENT(320, "make_aligned_word64_undefined");
2418 
2419 #ifndef PERF_FAST_STACK2
2420    make_mem_undefined(a, 8);
2421 #else
2422    {
2423       UWord   sm_off16;
2424       SecMap* sm;
2425 
2426       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2427          PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
2428          make_mem_undefined(a, 8);
2429          return;
2430       }
2431 
2432       sm       = get_secmap_for_writing_low(a);
2433       sm_off16 = SM_OFF_16(a);
2434       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2435    }
2436 #endif
2437 }
2438 
2439 static INLINE
make_aligned_word64_undefined_w_otag(Addr a,UInt otag)2440 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2441 {
2442    make_aligned_word64_undefined(a);
2443    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2444    //// Set the origins for a+0 .. a+7
2445    { OCacheLine* line;
2446      UWord lineoff = oc_line_offset(a);
2447      tl_assert(lineoff >= 0
2448                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2449      line = find_OCacheLine( a );
2450      line->descr[lineoff+0] = 0xF;
2451      line->descr[lineoff+1] = 0xF;
2452      line->w32[lineoff+0]   = otag;
2453      line->w32[lineoff+1]   = otag;
2454    }
2455    //// END inlined, specialised version of MC_(helperc_b_store8)
2456 }
2457 
2458 static INLINE
make_aligned_word64_noaccess(Addr a)2459 void make_aligned_word64_noaccess ( Addr a )
2460 {
2461    PROF_EVENT(330, "make_aligned_word64_noaccess");
2462 
2463 #ifndef PERF_FAST_STACK2
2464    MC_(make_mem_noaccess)(a, 8);
2465 #else
2466    {
2467       UWord   sm_off16;
2468       SecMap* sm;
2469 
2470       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2471          PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
2472          MC_(make_mem_noaccess)(a, 8);
2473          return;
2474       }
2475 
2476       sm       = get_secmap_for_writing_low(a);
2477       sm_off16 = SM_OFF_16(a);
2478       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2479 
2480       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2481       //// Clear the origins for a+0 .. a+7.
2482       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2483          OCacheLine* line;
2484          UWord lineoff = oc_line_offset(a);
2485          tl_assert(lineoff >= 0
2486                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2487          line = find_OCacheLine( a );
2488          line->descr[lineoff+0] = 0;
2489          line->descr[lineoff+1] = 0;
2490       }
2491       //// END inlined, specialised version of MC_(helperc_b_store8)
2492    }
2493 #endif
2494 }
2495 
2496 
2497 /*------------------------------------------------------------*/
2498 /*--- Stack pointer adjustment                             ---*/
2499 /*------------------------------------------------------------*/
2500 
2501 #ifdef PERF_FAST_STACK
2502 #  define MAYBE_USED
2503 #else
2504 #  define MAYBE_USED __attribute__((unused))
2505 #endif
2506 
2507 /*--------------- adjustment by 4 bytes ---------------*/
2508 
2509 MAYBE_USED
mc_new_mem_stack_4_w_ECU(Addr new_SP,UInt ecu)2510 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2511 {
2512    UInt otag = ecu | MC_OKIND_STACK;
2513    PROF_EVENT(110, "new_mem_stack_4");
2514    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2515       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2516    } else {
2517       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2518    }
2519 }
2520 
2521 MAYBE_USED
mc_new_mem_stack_4(Addr new_SP)2522 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2523 {
2524    PROF_EVENT(110, "new_mem_stack_4");
2525    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2526       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2527    } else {
2528       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2529    }
2530 }
2531 
2532 MAYBE_USED
mc_die_mem_stack_4(Addr new_SP)2533 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2534 {
2535    PROF_EVENT(120, "die_mem_stack_4");
2536    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2537       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2538    } else {
2539       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2540    }
2541 }
2542 
2543 /*--------------- adjustment by 8 bytes ---------------*/
2544 
2545 MAYBE_USED
mc_new_mem_stack_8_w_ECU(Addr new_SP,UInt ecu)2546 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2547 {
2548    UInt otag = ecu | MC_OKIND_STACK;
2549    PROF_EVENT(111, "new_mem_stack_8");
2550    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2551       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2552    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2553       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2554       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2555    } else {
2556       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2557    }
2558 }
2559 
2560 MAYBE_USED
mc_new_mem_stack_8(Addr new_SP)2561 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2562 {
2563    PROF_EVENT(111, "new_mem_stack_8");
2564    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2565       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2566    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2567       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2568       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2569    } else {
2570       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2571    }
2572 }
2573 
2574 MAYBE_USED
mc_die_mem_stack_8(Addr new_SP)2575 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2576 {
2577    PROF_EVENT(121, "die_mem_stack_8");
2578    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2579       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2580    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2581       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2582       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2583    } else {
2584       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2585    }
2586 }
2587 
2588 /*--------------- adjustment by 12 bytes ---------------*/
2589 
2590 MAYBE_USED
mc_new_mem_stack_12_w_ECU(Addr new_SP,UInt ecu)2591 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2592 {
2593    UInt otag = ecu | MC_OKIND_STACK;
2594    PROF_EVENT(112, "new_mem_stack_12");
2595    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2596       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2597       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2598    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2599       /* from previous test we don't have 8-alignment at offset +0,
2600          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2601          do 4 at +0 and then 8 at +4/. */
2602       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2603       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2604    } else {
2605       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2606    }
2607 }
2608 
2609 MAYBE_USED
mc_new_mem_stack_12(Addr new_SP)2610 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2611 {
2612    PROF_EVENT(112, "new_mem_stack_12");
2613    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2614       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2615       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2616    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2617       /* from previous test we don't have 8-alignment at offset +0,
2618          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2619          do 4 at +0 and then 8 at +4/. */
2620       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2621       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2622    } else {
2623       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2624    }
2625 }
2626 
2627 MAYBE_USED
mc_die_mem_stack_12(Addr new_SP)2628 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2629 {
2630    PROF_EVENT(122, "die_mem_stack_12");
2631    /* Note the -12 in the test */
2632    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2633       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2634          -4. */
2635       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2636       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2637    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2638       /* We have 4-alignment at +0, but we don't have 8-alignment at
2639          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
2640          and then 8 at -8. */
2641       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2642       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2643    } else {
2644       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2645    }
2646 }
2647 
2648 /*--------------- adjustment by 16 bytes ---------------*/
2649 
2650 MAYBE_USED
mc_new_mem_stack_16_w_ECU(Addr new_SP,UInt ecu)2651 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2652 {
2653    UInt otag = ecu | MC_OKIND_STACK;
2654    PROF_EVENT(113, "new_mem_stack_16");
2655    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2656       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2657       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2658       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2659    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2660       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2661          Hence do 4 at +0, 8 at +4, 4 at +12. */
2662       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2663       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2664       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2665    } else {
2666       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2667    }
2668 }
2669 
2670 MAYBE_USED
mc_new_mem_stack_16(Addr new_SP)2671 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2672 {
2673    PROF_EVENT(113, "new_mem_stack_16");
2674    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2675       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2676       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2677       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2678    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2679       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2680          Hence do 4 at +0, 8 at +4, 4 at +12. */
2681       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2682       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
2683       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2684    } else {
2685       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2686    }
2687 }
2688 
2689 MAYBE_USED
mc_die_mem_stack_16(Addr new_SP)2690 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2691 {
2692    PROF_EVENT(123, "die_mem_stack_16");
2693    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2694       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2695       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2696       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2697    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2698       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
2699       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2700       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2701       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2702    } else {
2703       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2704    }
2705 }
2706 
2707 /*--------------- adjustment by 32 bytes ---------------*/
2708 
2709 MAYBE_USED
mc_new_mem_stack_32_w_ECU(Addr new_SP,UInt ecu)2710 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2711 {
2712    UInt otag = ecu | MC_OKIND_STACK;
2713    PROF_EVENT(114, "new_mem_stack_32");
2714    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2715       /* Straightforward */
2716       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2717       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2718       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2719       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2720    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2721       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
2722          +0,+28. */
2723       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2724       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2725       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2726       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
2727       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
2728    } else {
2729       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
2730    }
2731 }
2732 
2733 MAYBE_USED
mc_new_mem_stack_32(Addr new_SP)2734 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
2735 {
2736    PROF_EVENT(114, "new_mem_stack_32");
2737    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2738       /* Straightforward */
2739       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2740       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2741       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2742       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2743    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2744       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
2745          +0,+28. */
2746       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2747       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2748       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2749       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
2750       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
2751    } else {
2752       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
2753    }
2754 }
2755 
2756 MAYBE_USED
mc_die_mem_stack_32(Addr new_SP)2757 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
2758 {
2759    PROF_EVENT(124, "die_mem_stack_32");
2760    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2761       /* Straightforward */
2762       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2763       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2764       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2765       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2766    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2767       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
2768          4 at -32,-4. */
2769       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2770       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
2771       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
2772       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2773       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2774    } else {
2775       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
2776    }
2777 }
2778 
2779 /*--------------- adjustment by 112 bytes ---------------*/
2780 
2781 MAYBE_USED
mc_new_mem_stack_112_w_ECU(Addr new_SP,UInt ecu)2782 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
2783 {
2784    UInt otag = ecu | MC_OKIND_STACK;
2785    PROF_EVENT(115, "new_mem_stack_112");
2786    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2787       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2788       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2789       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2790       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2791       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2792       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2793       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2794       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2795       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2796       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2797       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2798       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2799       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2800       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2801    } else {
2802       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
2803    }
2804 }
2805 
2806 MAYBE_USED
mc_new_mem_stack_112(Addr new_SP)2807 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
2808 {
2809    PROF_EVENT(115, "new_mem_stack_112");
2810    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2811       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2812       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2813       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2814       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2815       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2816       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2817       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2818       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2819       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2820       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2821       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2822       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2823       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2824       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2825    } else {
2826       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
2827    }
2828 }
2829 
2830 MAYBE_USED
mc_die_mem_stack_112(Addr new_SP)2831 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
2832 {
2833    PROF_EVENT(125, "die_mem_stack_112");
2834    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2835       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2836       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2837       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2838       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2839       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2840       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2841       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2842       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2843       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2844       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2845       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2846       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2847       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2848       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2849    } else {
2850       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
2851    }
2852 }
2853 
2854 /*--------------- adjustment by 128 bytes ---------------*/
2855 
2856 MAYBE_USED
mc_new_mem_stack_128_w_ECU(Addr new_SP,UInt ecu)2857 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
2858 {
2859    UInt otag = ecu | MC_OKIND_STACK;
2860    PROF_EVENT(116, "new_mem_stack_128");
2861    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2862       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2863       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2864       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2865       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2866       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2867       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2868       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2869       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2870       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2871       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2872       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2873       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2874       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2875       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2876       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
2877       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
2878    } else {
2879       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
2880    }
2881 }
2882 
2883 MAYBE_USED
mc_new_mem_stack_128(Addr new_SP)2884 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
2885 {
2886    PROF_EVENT(116, "new_mem_stack_128");
2887    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2888       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2889       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2890       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2891       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2892       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2893       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2894       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2895       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2896       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2897       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2898       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2899       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2900       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2901       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2902       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
2903       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
2904    } else {
2905       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
2906    }
2907 }
2908 
2909 MAYBE_USED
mc_die_mem_stack_128(Addr new_SP)2910 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
2911 {
2912    PROF_EVENT(126, "die_mem_stack_128");
2913    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2914       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
2915       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
2916       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2917       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2918       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2919       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2920       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2921       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2922       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2923       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2924       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2925       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2926       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2927       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2928       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2929       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2930    } else {
2931       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
2932    }
2933 }
2934 
2935 /*--------------- adjustment by 144 bytes ---------------*/
2936 
2937 MAYBE_USED
mc_new_mem_stack_144_w_ECU(Addr new_SP,UInt ecu)2938 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
2939 {
2940    UInt otag = ecu | MC_OKIND_STACK;
2941    PROF_EVENT(117, "new_mem_stack_144");
2942    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2943       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
2944       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
2945       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
2946       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
2947       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
2948       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
2949       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
2950       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
2951       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
2952       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
2953       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
2954       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
2955       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
2956       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2957       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
2958       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
2959       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
2960       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
2961    } else {
2962       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
2963    }
2964 }
2965 
2966 MAYBE_USED
mc_new_mem_stack_144(Addr new_SP)2967 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
2968 {
2969    PROF_EVENT(117, "new_mem_stack_144");
2970    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2971       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2972       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2973       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2974       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2975       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2976       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2977       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2978       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2979       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2980       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2981       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2982       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2983       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2984       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2985       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
2986       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
2987       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
2988       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
2989    } else {
2990       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
2991    }
2992 }
2993 
2994 MAYBE_USED
mc_die_mem_stack_144(Addr new_SP)2995 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
2996 {
2997    PROF_EVENT(127, "die_mem_stack_144");
2998    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2999       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3000       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3001       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3002       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3003       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3004       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3005       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3006       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3007       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3008       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3009       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3010       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3011       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3012       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3013       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3014       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3015       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3016       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3017    } else {
3018       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3019    }
3020 }
3021 
3022 /*--------------- adjustment by 160 bytes ---------------*/
3023 
3024 MAYBE_USED
mc_new_mem_stack_160_w_ECU(Addr new_SP,UInt ecu)3025 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3026 {
3027    UInt otag = ecu | MC_OKIND_STACK;
3028    PROF_EVENT(118, "new_mem_stack_160");
3029    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3030       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3031       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3032       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3033       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3034       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3035       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3036       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3037       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3038       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3039       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3040       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3041       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3042       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3043       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3044       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3045       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3046       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3047       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3048       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3049       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3050    } else {
3051       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3052    }
3053 }
3054 
3055 MAYBE_USED
mc_new_mem_stack_160(Addr new_SP)3056 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3057 {
3058    PROF_EVENT(118, "new_mem_stack_160");
3059    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3060       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3061       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3062       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3063       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3064       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3065       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3066       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3067       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3068       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3069       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3070       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3071       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3072       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3073       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3074       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3075       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3076       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3077       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3078       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3079       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3080    } else {
3081       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3082    }
3083 }
3084 
3085 MAYBE_USED
mc_die_mem_stack_160(Addr new_SP)3086 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3087 {
3088    PROF_EVENT(128, "die_mem_stack_160");
3089    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3090       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3091       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3092       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3093       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3094       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3095       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3096       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3097       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3098       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3099       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3100       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3101       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3102       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3103       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3104       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3105       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3106       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3107       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3108       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3109       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3110    } else {
3111       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3112    }
3113 }
3114 
3115 /*--------------- adjustment by N bytes ---------------*/
3116 
mc_new_mem_stack_w_ECU(Addr a,SizeT len,UInt ecu)3117 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3118 {
3119    UInt otag = ecu | MC_OKIND_STACK;
3120    PROF_EVENT(115, "new_mem_stack_w_otag");
3121    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3122 }
3123 
mc_new_mem_stack(Addr a,SizeT len)3124 static void mc_new_mem_stack ( Addr a, SizeT len )
3125 {
3126    PROF_EVENT(115, "new_mem_stack");
3127    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3128 }
3129 
mc_die_mem_stack(Addr a,SizeT len)3130 static void mc_die_mem_stack ( Addr a, SizeT len )
3131 {
3132    PROF_EVENT(125, "die_mem_stack");
3133    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3134 }
3135 
3136 
3137 /* The AMD64 ABI says:
3138 
3139    "The 128-byte area beyond the location pointed to by %rsp is considered
3140     to be reserved and shall not be modified by signal or interrupt
3141     handlers.  Therefore, functions may use this area for temporary data
3142     that is not needed across function calls.  In particular, leaf functions
3143     may use this area for their entire stack frame, rather than adjusting
3144     the stack pointer in the prologue and epilogue.  This area is known as
3145     red zone [sic]."
3146 
3147    So after any call or return we need to mark this redzone as containing
3148    undefined values.
3149 
3150    Consider this:  we're in function f.  f calls g.  g moves rsp down
3151    modestly (say 16 bytes) and writes stuff all over the red zone, making it
3152    defined.  g returns.  f is buggy and reads from parts of the red zone
3153    that it didn't write on.  But because g filled that area in, f is going
3154    to be picking up defined V bits and so any errors from reading bits of
3155    the red zone it didn't write, will be missed.  The only solution I could
3156    think of was to make the red zone undefined when g returns to f.
3157 
3158    This is in accordance with the ABI, which makes it clear the redzone
3159    is volatile across function calls.
3160 
3161    The problem occurs the other way round too: f could fill the RZ up
3162    with defined values and g could mistakenly read them.  So the RZ
3163    also needs to be nuked on function calls.
3164 */
3165 
3166 
3167 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
3168    improved so as to have a lower miss rate. */
3169 
3170 static UWord stats__nia_cache_queries = 0;
3171 static UWord stats__nia_cache_misses  = 0;
3172 
3173 typedef
3174    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
3175             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3176    WCacheEnt;
3177 
3178 #define N_NIA_TO_ECU_CACHE 511
3179 
3180 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3181 
init_nia_to_ecu_cache(void)3182 static void init_nia_to_ecu_cache ( void )
3183 {
3184    UWord       i;
3185    Addr        zero_addr = 0;
3186    ExeContext* zero_ec;
3187    UInt        zero_ecu;
3188    /* Fill all the slots with an entry for address zero, and the
3189       relevant otags accordingly.  Hence the cache is initially filled
3190       with valid data. */
3191    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3192    tl_assert(zero_ec);
3193    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3194    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3195    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3196       nia_to_ecu_cache[i].nia0 = zero_addr;
3197       nia_to_ecu_cache[i].ecu0 = zero_ecu;
3198       nia_to_ecu_cache[i].nia1 = zero_addr;
3199       nia_to_ecu_cache[i].ecu1 = zero_ecu;
3200    }
3201 }
3202 
convert_nia_to_ecu(Addr nia)3203 static inline UInt convert_nia_to_ecu ( Addr nia )
3204 {
3205    UWord i;
3206    UInt        ecu;
3207    ExeContext* ec;
3208 
3209    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3210 
3211    stats__nia_cache_queries++;
3212    i = nia % N_NIA_TO_ECU_CACHE;
3213    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3214 
3215    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3216       return nia_to_ecu_cache[i].ecu0;
3217 
3218    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3219 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3220       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3221       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3222 #     undef SWAP
3223       return nia_to_ecu_cache[i].ecu0;
3224    }
3225 
3226    stats__nia_cache_misses++;
3227    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3228    tl_assert(ec);
3229    ecu = VG_(get_ECU_from_ExeContext)(ec);
3230    tl_assert(VG_(is_plausible_ECU)(ecu));
3231 
3232    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3233    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3234 
3235    nia_to_ecu_cache[i].nia0 = nia;
3236    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3237    return ecu;
3238 }
3239 
3240 
3241 /* Note that this serves both the origin-tracking and
3242    no-origin-tracking modes.  We assume that calls to it are
3243    sufficiently infrequent that it isn't worth specialising for the
3244    with/without origin-tracking cases. */
MC_(helperc_MAKE_STACK_UNINIT)3245 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3246 {
3247    UInt otag;
3248    tl_assert(sizeof(UWord) == sizeof(SizeT));
3249    if (0)
3250       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3251                   base, len, nia );
3252 
3253    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3254       UInt ecu = convert_nia_to_ecu ( nia );
3255       tl_assert(VG_(is_plausible_ECU)(ecu));
3256       otag = ecu | MC_OKIND_STACK;
3257    } else {
3258       tl_assert(nia == 0);
3259       otag = 0;
3260    }
3261 
3262 #  if 0
3263    /* Really slow version */
3264    MC_(make_mem_undefined)(base, len, otag);
3265 #  endif
3266 
3267 #  if 0
3268    /* Slow(ish) version, which is fairly easily seen to be correct.
3269    */
3270    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3271       make_aligned_word64_undefined(base +   0, otag);
3272       make_aligned_word64_undefined(base +   8, otag);
3273       make_aligned_word64_undefined(base +  16, otag);
3274       make_aligned_word64_undefined(base +  24, otag);
3275 
3276       make_aligned_word64_undefined(base +  32, otag);
3277       make_aligned_word64_undefined(base +  40, otag);
3278       make_aligned_word64_undefined(base +  48, otag);
3279       make_aligned_word64_undefined(base +  56, otag);
3280 
3281       make_aligned_word64_undefined(base +  64, otag);
3282       make_aligned_word64_undefined(base +  72, otag);
3283       make_aligned_word64_undefined(base +  80, otag);
3284       make_aligned_word64_undefined(base +  88, otag);
3285 
3286       make_aligned_word64_undefined(base +  96, otag);
3287       make_aligned_word64_undefined(base + 104, otag);
3288       make_aligned_word64_undefined(base + 112, otag);
3289       make_aligned_word64_undefined(base + 120, otag);
3290    } else {
3291       MC_(make_mem_undefined)(base, len, otag);
3292    }
3293 #  endif
3294 
3295    /* Idea is: go fast when
3296          * 8-aligned and length is 128
3297          * the sm is available in the main primary map
3298          * the address range falls entirely with a single secondary map
3299       If all those conditions hold, just update the V+A bits by writing
3300       directly into the vabits array.  (If the sm was distinguished, this
3301       will make a copy and then write to it.)
3302    */
3303 
3304    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3305       /* Now we know the address range is suitably sized and aligned. */
3306       UWord a_lo = (UWord)(base);
3307       UWord a_hi = (UWord)(base + 128 - 1);
3308       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3309       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3310          // Now we know the entire range is within the main primary map.
3311          SecMap* sm    = get_secmap_for_writing_low(a_lo);
3312          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3313          /* Now we know that the entire address range falls within a
3314             single secondary map, and that that secondary 'lives' in
3315             the main primary map. */
3316          if (LIKELY(sm == sm_hi)) {
3317             // Finally, we know that the range is entirely within one secmap.
3318             UWord   v_off = SM_OFF(a_lo);
3319             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3320             p[ 0] = VA_BITS16_UNDEFINED;
3321             p[ 1] = VA_BITS16_UNDEFINED;
3322             p[ 2] = VA_BITS16_UNDEFINED;
3323             p[ 3] = VA_BITS16_UNDEFINED;
3324             p[ 4] = VA_BITS16_UNDEFINED;
3325             p[ 5] = VA_BITS16_UNDEFINED;
3326             p[ 6] = VA_BITS16_UNDEFINED;
3327             p[ 7] = VA_BITS16_UNDEFINED;
3328             p[ 8] = VA_BITS16_UNDEFINED;
3329             p[ 9] = VA_BITS16_UNDEFINED;
3330             p[10] = VA_BITS16_UNDEFINED;
3331             p[11] = VA_BITS16_UNDEFINED;
3332             p[12] = VA_BITS16_UNDEFINED;
3333             p[13] = VA_BITS16_UNDEFINED;
3334             p[14] = VA_BITS16_UNDEFINED;
3335             p[15] = VA_BITS16_UNDEFINED;
3336             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3337                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3338                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3339                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3340                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3341                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3342                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3343                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3344                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3345                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3346                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3347                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3348                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3349                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3350                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3351                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3352                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3353             }
3354             return;
3355          }
3356       }
3357    }
3358 
3359    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3360    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3361       /* Now we know the address range is suitably sized and aligned. */
3362       UWord a_lo = (UWord)(base);
3363       UWord a_hi = (UWord)(base + 288 - 1);
3364       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3365       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3366          // Now we know the entire range is within the main primary map.
3367          SecMap* sm    = get_secmap_for_writing_low(a_lo);
3368          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3369          /* Now we know that the entire address range falls within a
3370             single secondary map, and that that secondary 'lives' in
3371             the main primary map. */
3372          if (LIKELY(sm == sm_hi)) {
3373             // Finally, we know that the range is entirely within one secmap.
3374             UWord   v_off = SM_OFF(a_lo);
3375             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3376             p[ 0] = VA_BITS16_UNDEFINED;
3377             p[ 1] = VA_BITS16_UNDEFINED;
3378             p[ 2] = VA_BITS16_UNDEFINED;
3379             p[ 3] = VA_BITS16_UNDEFINED;
3380             p[ 4] = VA_BITS16_UNDEFINED;
3381             p[ 5] = VA_BITS16_UNDEFINED;
3382             p[ 6] = VA_BITS16_UNDEFINED;
3383             p[ 7] = VA_BITS16_UNDEFINED;
3384             p[ 8] = VA_BITS16_UNDEFINED;
3385             p[ 9] = VA_BITS16_UNDEFINED;
3386             p[10] = VA_BITS16_UNDEFINED;
3387             p[11] = VA_BITS16_UNDEFINED;
3388             p[12] = VA_BITS16_UNDEFINED;
3389             p[13] = VA_BITS16_UNDEFINED;
3390             p[14] = VA_BITS16_UNDEFINED;
3391             p[15] = VA_BITS16_UNDEFINED;
3392             p[16] = VA_BITS16_UNDEFINED;
3393             p[17] = VA_BITS16_UNDEFINED;
3394             p[18] = VA_BITS16_UNDEFINED;
3395             p[19] = VA_BITS16_UNDEFINED;
3396             p[20] = VA_BITS16_UNDEFINED;
3397             p[21] = VA_BITS16_UNDEFINED;
3398             p[22] = VA_BITS16_UNDEFINED;
3399             p[23] = VA_BITS16_UNDEFINED;
3400             p[24] = VA_BITS16_UNDEFINED;
3401             p[25] = VA_BITS16_UNDEFINED;
3402             p[26] = VA_BITS16_UNDEFINED;
3403             p[27] = VA_BITS16_UNDEFINED;
3404             p[28] = VA_BITS16_UNDEFINED;
3405             p[29] = VA_BITS16_UNDEFINED;
3406             p[30] = VA_BITS16_UNDEFINED;
3407             p[31] = VA_BITS16_UNDEFINED;
3408             p[32] = VA_BITS16_UNDEFINED;
3409             p[33] = VA_BITS16_UNDEFINED;
3410             p[34] = VA_BITS16_UNDEFINED;
3411             p[35] = VA_BITS16_UNDEFINED;
3412             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3413                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3414                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3415                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3416                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3417                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3418                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3419                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3420                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3421                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3422                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3423                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3424                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3425                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3426                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3427                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3428                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3429                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3430                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3431                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3432                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3433                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3434                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3435                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3436                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3437                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3438                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3439                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3440                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3441                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3442                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3443                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3444                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3445                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3446                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3447                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3448                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3449             }
3450             return;
3451          }
3452       }
3453    }
3454 
3455    /* else fall into slow case */
3456    MC_(make_mem_undefined_w_otag)(base, len, otag);
3457 }
3458 
3459 
3460 /*------------------------------------------------------------*/
3461 /*--- Checking memory                                      ---*/
3462 /*------------------------------------------------------------*/
3463 
3464 typedef
3465    enum {
3466       MC_Ok = 5,
3467       MC_AddrErr = 6,
3468       MC_ValueErr = 7
3469    }
3470    MC_ReadResult;
3471 
3472 
3473 /* Check permissions for address range.  If inadequate permissions
3474    exist, *bad_addr is set to the offending address, so the caller can
3475    know what it is. */
3476 
3477 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
3478    returns False, and if bad_addr is non-NULL, sets *bad_addr to
3479    indicate the lowest failing address.  Functions below are
3480    similar. */
MC_(check_mem_is_noaccess)3481 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3482 {
3483    SizeT i;
3484    UWord vabits2;
3485 
3486    PROF_EVENT(60, "check_mem_is_noaccess");
3487    for (i = 0; i < len; i++) {
3488       PROF_EVENT(61, "check_mem_is_noaccess(loop)");
3489       vabits2 = get_vabits2(a);
3490       if (VA_BITS2_NOACCESS != vabits2) {
3491          if (bad_addr != NULL) *bad_addr = a;
3492          return False;
3493       }
3494       a++;
3495    }
3496    return True;
3497 }
3498 
is_mem_addressable(Addr a,SizeT len,Addr * bad_addr)3499 static Bool is_mem_addressable ( Addr a, SizeT len,
3500                                  /*OUT*/Addr* bad_addr )
3501 {
3502    SizeT i;
3503    UWord vabits2;
3504 
3505    PROF_EVENT(62, "is_mem_addressable");
3506    for (i = 0; i < len; i++) {
3507       PROF_EVENT(63, "is_mem_addressable(loop)");
3508       vabits2 = get_vabits2(a);
3509       if (VA_BITS2_NOACCESS == vabits2) {
3510          if (bad_addr != NULL) *bad_addr = a;
3511          return False;
3512       }
3513       a++;
3514    }
3515    return True;
3516 }
3517 
is_mem_defined(Addr a,SizeT len,Addr * bad_addr,UInt * otag)3518 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3519                                       /*OUT*/Addr* bad_addr,
3520                                       /*OUT*/UInt* otag )
3521 {
3522    SizeT i;
3523    UWord vabits2;
3524 
3525    PROF_EVENT(64, "is_mem_defined");
3526    DEBUG("is_mem_defined\n");
3527 
3528    if (otag)     *otag = 0;
3529    if (bad_addr) *bad_addr = 0;
3530    for (i = 0; i < len; i++) {
3531       PROF_EVENT(65, "is_mem_defined(loop)");
3532       vabits2 = get_vabits2(a);
3533       if (VA_BITS2_DEFINED != vabits2) {
3534          // Error!  Nb: Report addressability errors in preference to
3535          // definedness errors.  And don't report definedeness errors unless
3536          // --undef-value-errors=yes.
3537          if (bad_addr) {
3538             *bad_addr = a;
3539          }
3540          if (VA_BITS2_NOACCESS == vabits2) {
3541             return MC_AddrErr;
3542          }
3543          if (MC_(clo_mc_level) >= 2) {
3544             if (otag && MC_(clo_mc_level) == 3) {
3545                *otag = MC_(helperc_b_load1)( a );
3546             }
3547             return MC_ValueErr;
3548          }
3549       }
3550       a++;
3551    }
3552    return MC_Ok;
3553 }
3554 
3555 
3556 /* Like is_mem_defined but doesn't give up at the first uninitialised
3557    byte -- the entire range is always checked.  This is important for
3558    detecting errors in the case where a checked range strays into
3559    invalid memory, but that fact is not detected by the ordinary
3560    is_mem_defined(), because of an undefined section that precedes the
3561    out of range section, possibly as a result of an alignment hole in
3562    the checked data.  This version always checks the entire range and
3563    can report both a definedness and an accessbility error, if
3564    necessary. */
is_mem_defined_comprehensive(Addr a,SizeT len,Bool * errorV,Addr * bad_addrV,UInt * otagV,Bool * errorA,Addr * bad_addrA)3565 static void is_mem_defined_comprehensive (
3566                Addr a, SizeT len,
3567                /*OUT*/Bool* errorV,    /* is there a definedness err? */
3568                /*OUT*/Addr* bad_addrV, /* if so where? */
3569                /*OUT*/UInt* otagV,     /* and what's its otag? */
3570                /*OUT*/Bool* errorA,    /* is there an addressability err? */
3571                /*OUT*/Addr* bad_addrA  /* if so where? */
3572             )
3573 {
3574    SizeT i;
3575    UWord vabits2;
3576    Bool  already_saw_errV = False;
3577 
3578    PROF_EVENT(64, "is_mem_defined"); // fixme
3579    DEBUG("is_mem_defined_comprehensive\n");
3580 
3581    tl_assert(!(*errorV || *errorA));
3582 
3583    for (i = 0; i < len; i++) {
3584       PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
3585       vabits2 = get_vabits2(a);
3586       switch (vabits2) {
3587          case VA_BITS2_DEFINED:
3588             a++;
3589             break;
3590          case VA_BITS2_UNDEFINED:
3591          case VA_BITS2_PARTDEFINED:
3592             if (!already_saw_errV) {
3593                *errorV    = True;
3594                *bad_addrV = a;
3595                if (MC_(clo_mc_level) == 3) {
3596                   *otagV = MC_(helperc_b_load1)( a );
3597                } else {
3598                   *otagV = 0;
3599                }
3600                already_saw_errV = True;
3601             }
3602             a++; /* keep going */
3603             break;
3604          case VA_BITS2_NOACCESS:
3605             *errorA    = True;
3606             *bad_addrA = a;
3607             return; /* give up now. */
3608          default:
3609             tl_assert(0);
3610       }
3611    }
3612 }
3613 
3614 
3615 /* Check a zero-terminated ascii string.  Tricky -- don't want to
3616    examine the actual bytes, to find the end, until we're sure it is
3617    safe to do so. */
3618 
mc_is_defined_asciiz(Addr a,Addr * bad_addr,UInt * otag)3619 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3620 {
3621    UWord vabits2;
3622 
3623    PROF_EVENT(66, "mc_is_defined_asciiz");
3624    DEBUG("mc_is_defined_asciiz\n");
3625 
3626    if (otag)     *otag = 0;
3627    if (bad_addr) *bad_addr = 0;
3628    while (True) {
3629       PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
3630       vabits2 = get_vabits2(a);
3631       if (VA_BITS2_DEFINED != vabits2) {
3632          // Error!  Nb: Report addressability errors in preference to
3633          // definedness errors.  And don't report definedeness errors unless
3634          // --undef-value-errors=yes.
3635          if (bad_addr) {
3636             *bad_addr = a;
3637          }
3638          if (VA_BITS2_NOACCESS == vabits2) {
3639             return MC_AddrErr;
3640          }
3641          if (MC_(clo_mc_level) >= 2) {
3642             if (otag && MC_(clo_mc_level) == 3) {
3643                *otag = MC_(helperc_b_load1)( a );
3644             }
3645             return MC_ValueErr;
3646          }
3647       }
3648       /* Ok, a is safe to read. */
3649       if (* ((UChar*)a) == 0) {
3650          return MC_Ok;
3651       }
3652       a++;
3653    }
3654 }
3655 
3656 
3657 /*------------------------------------------------------------*/
3658 /*--- Memory event handlers                                ---*/
3659 /*------------------------------------------------------------*/
3660 
3661 static
check_mem_is_addressable(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)3662 void check_mem_is_addressable ( CorePart part, ThreadId tid, Char* s,
3663                                 Addr base, SizeT size )
3664 {
3665    Addr bad_addr;
3666    Bool ok = is_mem_addressable ( base, size, &bad_addr );
3667 
3668    if (!ok) {
3669       switch (part) {
3670       case Vg_CoreSysCall:
3671          MC_(record_memparam_error) ( tid, bad_addr,
3672                                       /*isAddrErr*/True, s, 0/*otag*/ );
3673          break;
3674 
3675       case Vg_CoreSignal:
3676          MC_(record_core_mem_error)( tid, s );
3677          break;
3678 
3679       default:
3680          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3681       }
3682    }
3683 }
3684 
3685 static
check_mem_is_defined(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)3686 void check_mem_is_defined ( CorePart part, ThreadId tid, Char* s,
3687                             Addr base, SizeT size )
3688 {
3689    UInt otag = 0;
3690    Addr bad_addr;
3691    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3692 
3693    if (MC_Ok != res) {
3694       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3695 
3696       switch (part) {
3697       case Vg_CoreSysCall:
3698          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3699                                       isAddrErr ? 0 : otag );
3700          break;
3701 
3702       case Vg_CoreSysCallArgInMem:
3703          MC_(record_regparam_error) ( tid, s, otag );
3704          break;
3705 
3706       /* If we're being asked to jump to a silly address, record an error
3707          message before potentially crashing the entire system. */
3708       case Vg_CoreTranslate:
3709          MC_(record_jump_error)( tid, bad_addr );
3710          break;
3711 
3712       default:
3713          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3714       }
3715    }
3716 }
3717 
3718 static
check_mem_is_defined_asciiz(CorePart part,ThreadId tid,Char * s,Addr str)3719 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3720                                    Char* s, Addr str )
3721 {
3722    MC_ReadResult res;
3723    Addr bad_addr = 0;   // shut GCC up
3724    UInt otag = 0;
3725 
3726    tl_assert(part == Vg_CoreSysCall);
3727    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
3728    if (MC_Ok != res) {
3729       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3730       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3731                                    isAddrErr ? 0 : otag );
3732    }
3733 }
3734 
3735 /* Handling of mmap and mprotect is not as simple as it seems.
3736 
3737    The underlying semantics are that memory obtained from mmap is
3738    always initialised, but may be inaccessible.  And changes to the
3739    protection of memory do not change its contents and hence not its
3740    definedness state.  Problem is we can't model
3741    inaccessible-but-with-some-definedness state; once we mark memory
3742    as inaccessible we lose all info about definedness, and so can't
3743    restore that if it is later made accessible again.
3744 
3745    One obvious thing to do is this:
3746 
3747       mmap/mprotect NONE  -> noaccess
3748       mmap/mprotect other -> defined
3749 
3750    The problem case here is: taking accessible memory, writing
3751    uninitialised data to it, mprotecting it NONE and later mprotecting
3752    it back to some accessible state causes the undefinedness to be
3753    lost.
3754 
3755    A better proposal is:
3756 
3757      (1) mmap NONE       ->  make noaccess
3758      (2) mmap other      ->  make defined
3759 
3760      (3) mprotect NONE   ->  # no change
3761      (4) mprotect other  ->  change any "noaccess" to "defined"
3762 
3763    (2) is OK because memory newly obtained from mmap really is defined
3764        (zeroed out by the kernel -- doing anything else would
3765        constitute a massive security hole.)
3766 
3767    (1) is OK because the only way to make the memory usable is via
3768        (4), in which case we also wind up correctly marking it all as
3769        defined.
3770 
3771    (3) is the weak case.  We choose not to change memory state.
3772        (presumably the range is in some mixture of "defined" and
3773        "undefined", viz, accessible but with arbitrary V bits).  Doing
3774        nothing means we retain the V bits, so that if the memory is
3775        later mprotected "other", the V bits remain unchanged, so there
3776        can be no false negatives.  The bad effect is that if there's
3777        an access in the area, then MC cannot warn; but at least we'll
3778        get a SEGV to show, so it's better than nothing.
3779 
3780    Consider the sequence (3) followed by (4).  Any memory that was
3781    "defined" or "undefined" previously retains its state (as
3782    required).  Any memory that was "noaccess" before can only have
3783    been made that way by (1), and so it's OK to change it to
3784    "defined".
3785 
3786    See https://bugs.kde.org/show_bug.cgi?id=205541
3787    and https://bugs.kde.org/show_bug.cgi?id=210268
3788 */
3789 static
mc_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)3790 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
3791                        ULong di_handle )
3792 {
3793    if (rr || ww || xx) {
3794       /* (2) mmap/mprotect other -> defined */
3795       MC_(make_mem_defined)(a, len);
3796    } else {
3797       /* (1) mmap/mprotect NONE  -> noaccess */
3798       MC_(make_mem_noaccess)(a, len);
3799    }
3800 }
3801 
3802 static
mc_new_mem_mprotect(Addr a,SizeT len,Bool rr,Bool ww,Bool xx)3803 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
3804 {
3805    if (rr || ww || xx) {
3806       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
3807       make_mem_defined_if_noaccess(a, len);
3808    } else {
3809       /* (3) mprotect NONE   ->  # no change */
3810       /* do nothing */
3811    }
3812 }
3813 
3814 
3815 static
mc_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)3816 void mc_new_mem_startup( Addr a, SizeT len,
3817                          Bool rr, Bool ww, Bool xx, ULong di_handle )
3818 {
3819    // Because code is defined, initialised variables get put in the data
3820    // segment and are defined, and uninitialised variables get put in the
3821    // bss segment and are auto-zeroed (and so defined).
3822    //
3823    // It's possible that there will be padding between global variables.
3824    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
3825    // a program uses it, Memcheck will not complain.  This is arguably a
3826    // false negative, but it's a grey area -- the behaviour is defined (the
3827    // padding is zeroed) but it's probably not what the user intended.  And
3828    // we can't avoid it.
3829    //
3830    // Note: we generally ignore RWX permissions, because we can't track them
3831    // without requiring more than one A bit which would slow things down a
3832    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
3833    // So we mark any such pages as "unaddressable".
3834    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
3835          a, (ULong)len, rr, ww, xx);
3836    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
3837 }
3838 
3839 static
mc_post_mem_write(CorePart part,ThreadId tid,Addr a,SizeT len)3840 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
3841 {
3842    MC_(make_mem_defined)(a, len);
3843 }
3844 
3845 
3846 /*------------------------------------------------------------*/
3847 /*--- Register event handlers                              ---*/
3848 /*------------------------------------------------------------*/
3849 
3850 /* Try and get a nonzero origin for the guest state section of thread
3851    tid characterised by (offset,size).  Return 0 if nothing to show
3852    for it. */
mb_get_origin_for_guest_offset(ThreadId tid,Int offset,SizeT size)3853 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
3854                                              Int offset, SizeT size )
3855 {
3856    Int   sh2off;
3857    UChar area[6];
3858    UInt  otag;
3859    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
3860    if (sh2off == -1)
3861       return 0;  /* This piece of guest state is not tracked */
3862    tl_assert(sh2off >= 0);
3863    tl_assert(0 == (sh2off % 4));
3864    area[0] = 0x31;
3865    area[5] = 0x27;
3866    VG_(get_shadow_regs_area)( tid, &area[1], 2/*shadowno*/,sh2off,4 );
3867    tl_assert(area[0] == 0x31);
3868    tl_assert(area[5] == 0x27);
3869    otag = *(UInt*)&area[1];
3870    return otag;
3871 }
3872 
3873 
3874 /* When some chunk of guest state is written, mark the corresponding
3875    shadow area as valid.  This is used to initialise arbitrarily large
3876    chunks of guest state, hence the _SIZE value, which has to be as
3877    big as the biggest guest state.
3878 */
mc_post_reg_write(CorePart part,ThreadId tid,PtrdiffT offset,SizeT size)3879 static void mc_post_reg_write ( CorePart part, ThreadId tid,
3880                                 PtrdiffT offset, SizeT size)
3881 {
3882 #  define MAX_REG_WRITE_SIZE 1664
3883    UChar area[MAX_REG_WRITE_SIZE];
3884    tl_assert(size <= MAX_REG_WRITE_SIZE);
3885    VG_(memset)(area, V_BITS8_DEFINED, size);
3886    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
3887 #  undef MAX_REG_WRITE_SIZE
3888 }
3889 
3890 static
mc_post_reg_write_clientcall(ThreadId tid,PtrdiffT offset,SizeT size,Addr f)3891 void mc_post_reg_write_clientcall ( ThreadId tid,
3892                                     PtrdiffT offset, SizeT size, Addr f)
3893 {
3894    mc_post_reg_write(/*dummy*/0, tid, offset, size);
3895 }
3896 
3897 /* Look at the definedness of the guest's shadow state for
3898    [offset, offset+len).  If any part of that is undefined, record
3899    a parameter error.
3900 */
mc_pre_reg_read(CorePart part,ThreadId tid,Char * s,PtrdiffT offset,SizeT size)3901 static void mc_pre_reg_read ( CorePart part, ThreadId tid, Char* s,
3902                               PtrdiffT offset, SizeT size)
3903 {
3904    Int   i;
3905    Bool  bad;
3906    UInt  otag;
3907 
3908    UChar area[16];
3909    tl_assert(size <= 16);
3910 
3911    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
3912 
3913    bad = False;
3914    for (i = 0; i < size; i++) {
3915       if (area[i] != V_BITS8_DEFINED) {
3916          bad = True;
3917          break;
3918       }
3919    }
3920 
3921    if (!bad)
3922       return;
3923 
3924    /* We've found some undefinedness.  See if we can also find an
3925       origin for it. */
3926    otag = mb_get_origin_for_guest_offset( tid, offset, size );
3927    MC_(record_regparam_error) ( tid, s, otag );
3928 }
3929 
3930 
3931 /*------------------------------------------------------------*/
3932 /*--- Functions called directly from generated code:       ---*/
3933 /*--- Load/store handlers.                                 ---*/
3934 /*------------------------------------------------------------*/
3935 
3936 /* Types:  LOADV32, LOADV16, LOADV8 are:
3937                UWord fn ( Addr a )
3938    so they return 32-bits on 32-bit machines and 64-bits on
3939    64-bit machines.  Addr has the same size as a host word.
3940 
3941    LOADV64 is always  ULong fn ( Addr a )
3942 
3943    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
3944    are a UWord, and for STOREV64 they are a ULong.
3945 */
3946 
3947 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
3948    naturally '_sz/8'-aligned, or it exceeds the range covered by the
3949    primary map.  This is all very tricky (and important!), so let's
3950    work through the maths by hand (below), *and* assert for these
3951    values at startup. */
3952 #define MASK(_szInBytes) \
3953    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
3954 
3955 /* MASK only exists so as to define this macro. */
3956 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
3957    ((_a) & MASK((_szInBits>>3)))
3958 
3959 /* On a 32-bit machine:
3960 
3961    N_PRIMARY_BITS          == 16, so
3962    N_PRIMARY_MAP           == 0x10000, so
3963    N_PRIMARY_MAP-1         == 0xFFFF, so
3964    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
3965 
3966    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
3967            = ~ ( 0xFFFF | 0xFFFF0000 )
3968            = ~ 0xFFFF'FFFF
3969            = 0
3970 
3971    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
3972            = ~ ( 0xFFFE | 0xFFFF0000 )
3973            = ~ 0xFFFF'FFFE
3974            = 1
3975 
3976    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
3977            = ~ ( 0xFFFC | 0xFFFF0000 )
3978            = ~ 0xFFFF'FFFC
3979            = 3
3980 
3981    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
3982            = ~ ( 0xFFF8 | 0xFFFF0000 )
3983            = ~ 0xFFFF'FFF8
3984            = 7
3985 
3986    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
3987    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
3988    the 1-byte alignment case, it is always a zero value, since MASK(1)
3989    is zero.  All as expected.
3990 
3991    On a 64-bit machine, it's more complex, since we're testing
3992    simultaneously for misalignment and for the address being at or
3993    above 32G:
3994 
3995    N_PRIMARY_BITS          == 19, so
3996    N_PRIMARY_MAP           == 0x80000, so
3997    N_PRIMARY_MAP-1         == 0x7FFFF, so
3998    (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so
3999 
4000    MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 )
4001            = ~ ( 0xFFFF | 0x7FFFF'0000 )
4002            = ~ 0x7FFFF'FFFF
4003            = 0xFFFF'FFF8'0000'0000
4004 
4005    MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 )
4006            = ~ ( 0xFFFE | 0x7FFFF'0000 )
4007            = ~ 0x7FFFF'FFFE
4008            = 0xFFFF'FFF8'0000'0001
4009 
4010    MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 )
4011            = ~ ( 0xFFFC | 0x7FFFF'0000 )
4012            = ~ 0x7FFFF'FFFC
4013            = 0xFFFF'FFF8'0000'0003
4014 
4015    MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 )
4016            = ~ ( 0xFFF8 | 0x7FFFF'0000 )
4017            = ~ 0x7FFFF'FFF8
4018            = 0xFFFF'FFF8'0000'0007
4019 */
4020 
4021 
4022 /* ------------------------ Size = 8 ------------------------ */
4023 
4024 static INLINE
mc_LOADV64(Addr a,Bool isBigEndian)4025 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4026 {
4027    PROF_EVENT(200, "mc_LOADV64");
4028 
4029 #ifndef PERF_FAST_LOADV
4030    return mc_LOADVn_slow( a, 64, isBigEndian );
4031 #else
4032    {
4033       UWord   sm_off16, vabits16;
4034       SecMap* sm;
4035 
4036       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4037          PROF_EVENT(201, "mc_LOADV64-slow1");
4038          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4039       }
4040 
4041       sm       = get_secmap_for_reading_low(a);
4042       sm_off16 = SM_OFF_16(a);
4043       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4044 
4045       // Handle common case quickly: a is suitably aligned, is mapped, and
4046       // addressible.
4047       // Convert V bits from compact memory form to expanded register form.
4048       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4049          return V_BITS64_DEFINED;
4050       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4051          return V_BITS64_UNDEFINED;
4052       } else {
4053          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4054          PROF_EVENT(202, "mc_LOADV64-slow2");
4055          return mc_LOADVn_slow( a, 64, isBigEndian );
4056       }
4057    }
4058 #endif
4059 }
4060 
MC_(helperc_LOADV64be)4061 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4062 {
4063    return mc_LOADV64(a, True);
4064 }
MC_(helperc_LOADV64le)4065 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4066 {
4067    return mc_LOADV64(a, False);
4068 }
4069 
4070 
4071 static INLINE
mc_STOREV64(Addr a,ULong vbits64,Bool isBigEndian)4072 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4073 {
4074    PROF_EVENT(210, "mc_STOREV64");
4075 
4076 #ifndef PERF_FAST_STOREV
4077    // XXX: this slow case seems to be marginally faster than the fast case!
4078    // Investigate further.
4079    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4080 #else
4081    {
4082       UWord   sm_off16, vabits16;
4083       SecMap* sm;
4084 
4085       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4086          PROF_EVENT(211, "mc_STOREV64-slow1");
4087          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4088          return;
4089       }
4090 
4091       sm       = get_secmap_for_reading_low(a);
4092       sm_off16 = SM_OFF_16(a);
4093       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4094 
4095       if (LIKELY( !is_distinguished_sm(sm) &&
4096                           (VA_BITS16_DEFINED   == vabits16 ||
4097                            VA_BITS16_UNDEFINED == vabits16) ))
4098       {
4099          /* Handle common case quickly: a is suitably aligned, */
4100          /* is mapped, and is addressible. */
4101          // Convert full V-bits in register to compact 2-bit form.
4102          if (V_BITS64_DEFINED == vbits64) {
4103             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4104          } else if (V_BITS64_UNDEFINED == vbits64) {
4105             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4106          } else {
4107             /* Slow but general case -- writing partially defined bytes. */
4108             PROF_EVENT(212, "mc_STOREV64-slow2");
4109             mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4110          }
4111       } else {
4112          /* Slow but general case. */
4113          PROF_EVENT(213, "mc_STOREV64-slow3");
4114          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4115       }
4116    }
4117 #endif
4118 }
4119 
MC_(helperc_STOREV64be)4120 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4121 {
4122    mc_STOREV64(a, vbits64, True);
4123 }
MC_(helperc_STOREV64le)4124 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4125 {
4126    mc_STOREV64(a, vbits64, False);
4127 }
4128 
4129 
4130 /* ------------------------ Size = 4 ------------------------ */
4131 
4132 static INLINE
mc_LOADV32(Addr a,Bool isBigEndian)4133 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4134 {
4135    PROF_EVENT(220, "mc_LOADV32");
4136 
4137 #ifndef PERF_FAST_LOADV
4138    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4139 #else
4140    {
4141       UWord   sm_off, vabits8;
4142       SecMap* sm;
4143 
4144       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4145          PROF_EVENT(221, "mc_LOADV32-slow1");
4146          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4147       }
4148 
4149       sm      = get_secmap_for_reading_low(a);
4150       sm_off  = SM_OFF(a);
4151       vabits8 = sm->vabits8[sm_off];
4152 
4153       // Handle common case quickly: a is suitably aligned, is mapped, and the
4154       // entire word32 it lives in is addressible.
4155       // Convert V bits from compact memory form to expanded register form.
4156       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4157       // Almost certainly not necessary, but be paranoid.
4158       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4159          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4160       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4161          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4162       } else {
4163          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4164          PROF_EVENT(222, "mc_LOADV32-slow2");
4165          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4166       }
4167    }
4168 #endif
4169 }
4170 
MC_(helperc_LOADV32be)4171 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4172 {
4173    return mc_LOADV32(a, True);
4174 }
MC_(helperc_LOADV32le)4175 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4176 {
4177    return mc_LOADV32(a, False);
4178 }
4179 
4180 
4181 static INLINE
mc_STOREV32(Addr a,UWord vbits32,Bool isBigEndian)4182 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4183 {
4184    PROF_EVENT(230, "mc_STOREV32");
4185 
4186 #ifndef PERF_FAST_STOREV
4187    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4188 #else
4189    {
4190       UWord   sm_off, vabits8;
4191       SecMap* sm;
4192 
4193       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4194          PROF_EVENT(231, "mc_STOREV32-slow1");
4195          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4196          return;
4197       }
4198 
4199       sm      = get_secmap_for_reading_low(a);
4200       sm_off  = SM_OFF(a);
4201       vabits8 = sm->vabits8[sm_off];
4202 
4203       // Cleverness:  sometimes we don't have to write the shadow memory at
4204       // all, if we can tell that what we want to write is the same as what is
4205       // already there.  The 64/16/8 bit cases also have cleverness at this
4206       // point, but it works a little differently to the code below.
4207       if (V_BITS32_DEFINED == vbits32) {
4208          if (vabits8 == (UInt)VA_BITS8_DEFINED) {
4209             return;
4210          } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
4211             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4212          } else {
4213             // not defined/undefined, or distinguished and changing state
4214             PROF_EVENT(232, "mc_STOREV32-slow2");
4215             mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4216          }
4217       } else if (V_BITS32_UNDEFINED == vbits32) {
4218          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4219             return;
4220          } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4221             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4222          } else {
4223             // not defined/undefined, or distinguished and changing state
4224             PROF_EVENT(233, "mc_STOREV32-slow3");
4225             mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4226          }
4227       } else {
4228          // Partially defined word
4229          PROF_EVENT(234, "mc_STOREV32-slow4");
4230          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4231       }
4232    }
4233 #endif
4234 }
4235 
MC_(helperc_STOREV32be)4236 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4237 {
4238    mc_STOREV32(a, vbits32, True);
4239 }
MC_(helperc_STOREV32le)4240 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4241 {
4242    mc_STOREV32(a, vbits32, False);
4243 }
4244 
4245 
4246 /* ------------------------ Size = 2 ------------------------ */
4247 
4248 static INLINE
mc_LOADV16(Addr a,Bool isBigEndian)4249 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4250 {
4251    PROF_EVENT(240, "mc_LOADV16");
4252 
4253 #ifndef PERF_FAST_LOADV
4254    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4255 #else
4256    {
4257       UWord   sm_off, vabits8;
4258       SecMap* sm;
4259 
4260       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4261          PROF_EVENT(241, "mc_LOADV16-slow1");
4262          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4263       }
4264 
4265       sm      = get_secmap_for_reading_low(a);
4266       sm_off  = SM_OFF(a);
4267       vabits8 = sm->vabits8[sm_off];
4268       // Handle common case quickly: a is suitably aligned, is mapped, and is
4269       // addressible.
4270       // Convert V bits from compact memory form to expanded register form
4271       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
4272       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
4273       else {
4274          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4275          // the two sub-bytes.
4276          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4277          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
4278          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4279          else {
4280             /* Slow case: the two bytes are not all-defined or all-undefined. */
4281             PROF_EVENT(242, "mc_LOADV16-slow2");
4282             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4283          }
4284       }
4285    }
4286 #endif
4287 }
4288 
MC_(helperc_LOADV16be)4289 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4290 {
4291    return mc_LOADV16(a, True);
4292 }
MC_(helperc_LOADV16le)4293 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
4294 {
4295    return mc_LOADV16(a, False);
4296 }
4297 
4298 
4299 static INLINE
mc_STOREV16(Addr a,UWord vbits16,Bool isBigEndian)4300 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
4301 {
4302    PROF_EVENT(250, "mc_STOREV16");
4303 
4304 #ifndef PERF_FAST_STOREV
4305    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4306 #else
4307    {
4308       UWord   sm_off, vabits8;
4309       SecMap* sm;
4310 
4311       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4312          PROF_EVENT(251, "mc_STOREV16-slow1");
4313          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4314          return;
4315       }
4316 
4317       sm      = get_secmap_for_reading_low(a);
4318       sm_off  = SM_OFF(a);
4319       vabits8 = sm->vabits8[sm_off];
4320       if (LIKELY( !is_distinguished_sm(sm) &&
4321                           (VA_BITS8_DEFINED   == vabits8 ||
4322                            VA_BITS8_UNDEFINED == vabits8) ))
4323       {
4324          /* Handle common case quickly: a is suitably aligned, */
4325          /* is mapped, and is addressible. */
4326          // Convert full V-bits in register to compact 2-bit form.
4327          if (V_BITS16_DEFINED == vbits16) {
4328             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED ,
4329                                          &(sm->vabits8[sm_off]) );
4330          } else if (V_BITS16_UNDEFINED == vbits16) {
4331             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
4332                                          &(sm->vabits8[sm_off]) );
4333          } else {
4334             /* Slow but general case -- writing partially defined bytes. */
4335             PROF_EVENT(252, "mc_STOREV16-slow2");
4336             mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4337          }
4338       } else {
4339          /* Slow but general case. */
4340          PROF_EVENT(253, "mc_STOREV16-slow3");
4341          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4342       }
4343    }
4344 #endif
4345 }
4346 
MC_(helperc_STOREV16be)4347 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
4348 {
4349    mc_STOREV16(a, vbits16, True);
4350 }
MC_(helperc_STOREV16le)4351 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
4352 {
4353    mc_STOREV16(a, vbits16, False);
4354 }
4355 
4356 
4357 /* ------------------------ Size = 1 ------------------------ */
4358 /* Note: endianness is irrelevant for size == 1 */
4359 
4360 VG_REGPARM(1)
MC_(helperc_LOADV8)4361 UWord MC_(helperc_LOADV8) ( Addr a )
4362 {
4363    PROF_EVENT(260, "mc_LOADV8");
4364 
4365 #ifndef PERF_FAST_LOADV
4366    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4367 #else
4368    {
4369       UWord   sm_off, vabits8;
4370       SecMap* sm;
4371 
4372       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4373          PROF_EVENT(261, "mc_LOADV8-slow1");
4374          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4375       }
4376 
4377       sm      = get_secmap_for_reading_low(a);
4378       sm_off  = SM_OFF(a);
4379       vabits8 = sm->vabits8[sm_off];
4380       // Convert V bits from compact memory form to expanded register form
4381       // Handle common case quickly: a is mapped, and the entire
4382       // word32 it lives in is addressible.
4383       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
4384       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
4385       else {
4386          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4387          // the single byte.
4388          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
4389          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
4390          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
4391          else {
4392             /* Slow case: the byte is not all-defined or all-undefined. */
4393             PROF_EVENT(262, "mc_LOADV8-slow2");
4394             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4395          }
4396       }
4397    }
4398 #endif
4399 }
4400 
4401 
4402 VG_REGPARM(2)
MC_(helperc_STOREV8)4403 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
4404 {
4405    PROF_EVENT(270, "mc_STOREV8");
4406 
4407 #ifndef PERF_FAST_STOREV
4408    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4409 #else
4410    {
4411       UWord   sm_off, vabits8;
4412       SecMap* sm;
4413 
4414       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4415          PROF_EVENT(271, "mc_STOREV8-slow1");
4416          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4417          return;
4418       }
4419 
4420       sm      = get_secmap_for_reading_low(a);
4421       sm_off  = SM_OFF(a);
4422       vabits8 = sm->vabits8[sm_off];
4423       if (LIKELY
4424             ( !is_distinguished_sm(sm) &&
4425               ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8)
4426              || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8))
4427               )
4428             )
4429          )
4430       {
4431          /* Handle common case quickly: a is mapped, the entire word32 it
4432             lives in is addressible. */
4433          // Convert full V-bits in register to compact 2-bit form.
4434          if (V_BITS8_DEFINED == vbits8) {
4435             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
4436                                           &(sm->vabits8[sm_off]) );
4437          } else if (V_BITS8_UNDEFINED == vbits8) {
4438             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
4439                                           &(sm->vabits8[sm_off]) );
4440          } else {
4441             /* Slow but general case -- writing partially defined bytes. */
4442             PROF_EVENT(272, "mc_STOREV8-slow2");
4443             mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4444          }
4445       } else {
4446          /* Slow but general case. */
4447          PROF_EVENT(273, "mc_STOREV8-slow3");
4448          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4449       }
4450    }
4451 #endif
4452 }
4453 
4454 
4455 /*------------------------------------------------------------*/
4456 /*--- Functions called directly from generated code:       ---*/
4457 /*--- Value-check failure handlers.                        ---*/
4458 /*------------------------------------------------------------*/
4459 
4460 /* Call these ones when an origin is available ... */
4461 VG_REGPARM(1)
MC_(helperc_value_check0_fail_w_o)4462 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
4463    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
4464 }
4465 
4466 VG_REGPARM(1)
MC_(helperc_value_check1_fail_w_o)4467 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
4468    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
4469 }
4470 
4471 VG_REGPARM(1)
MC_(helperc_value_check4_fail_w_o)4472 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
4473    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
4474 }
4475 
4476 VG_REGPARM(1)
MC_(helperc_value_check8_fail_w_o)4477 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
4478    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
4479 }
4480 
4481 VG_REGPARM(2)
MC_(helperc_value_checkN_fail_w_o)4482 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
4483    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
4484 }
4485 
4486 /* ... and these when an origin isn't available. */
4487 
4488 VG_REGPARM(0)
MC_(helperc_value_check0_fail_no_o)4489 void MC_(helperc_value_check0_fail_no_o) ( void ) {
4490    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
4491 }
4492 
4493 VG_REGPARM(0)
MC_(helperc_value_check1_fail_no_o)4494 void MC_(helperc_value_check1_fail_no_o) ( void ) {
4495    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
4496 }
4497 
4498 VG_REGPARM(0)
MC_(helperc_value_check4_fail_no_o)4499 void MC_(helperc_value_check4_fail_no_o) ( void ) {
4500    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
4501 }
4502 
4503 VG_REGPARM(0)
MC_(helperc_value_check8_fail_no_o)4504 void MC_(helperc_value_check8_fail_no_o) ( void ) {
4505    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
4506 }
4507 
4508 VG_REGPARM(1)
MC_(helperc_value_checkN_fail_no_o)4509 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
4510    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
4511 }
4512 
4513 
4514 /*------------------------------------------------------------*/
4515 /*--- Metadata get/set functions, for client requests.     ---*/
4516 /*------------------------------------------------------------*/
4517 
4518 // Nb: this expands the V+A bits out into register-form V bits, even though
4519 // they're in memory.  This is for backward compatibility, and because it's
4520 // probably what the user wants.
4521 
4522 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
4523    error [no longer used], 3 == addressing error. */
4524 /* Nb: We used to issue various definedness/addressability errors from here,
4525    but we took them out because they ranged from not-very-helpful to
4526    downright annoying, and they complicated the error data structures. */
mc_get_or_set_vbits_for_client(Addr a,Addr vbits,SizeT szB,Bool setting,Bool is_client_request)4527 static Int mc_get_or_set_vbits_for_client (
4528    Addr a,
4529    Addr vbits,
4530    SizeT szB,
4531    Bool setting, /* True <=> set vbits,  False <=> get vbits */
4532    Bool is_client_request /* True <=> real user request
4533                              False <=> internal call from gdbserver */
4534 )
4535 {
4536    SizeT i;
4537    Bool  ok;
4538    UChar vbits8;
4539 
4540    /* Check that arrays are addressible before doing any getting/setting.
4541       vbits to be checked only for real user request. */
4542    for (i = 0; i < szB; i++) {
4543       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
4544           (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
4545          return 3;
4546       }
4547    }
4548 
4549    /* Do the copy */
4550    if (setting) {
4551       /* setting */
4552       for (i = 0; i < szB; i++) {
4553          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
4554          tl_assert(ok);
4555       }
4556    } else {
4557       /* getting */
4558       for (i = 0; i < szB; i++) {
4559          ok = get_vbits8(a + i, &vbits8);
4560          tl_assert(ok);
4561          ((UChar*)vbits)[i] = vbits8;
4562       }
4563       if (is_client_request)
4564         // The bytes in vbits[] have now been set, so mark them as such.
4565         MC_(make_mem_defined)(vbits, szB);
4566    }
4567 
4568    return 1;
4569 }
4570 
4571 
4572 /*------------------------------------------------------------*/
4573 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
4574 /*------------------------------------------------------------*/
4575 
4576 /* For the memory leak detector, say whether an entire 64k chunk of
4577    address space is possibly in use, or not.  If in doubt return
4578    True.
4579 */
MC_(is_within_valid_secondary)4580 Bool MC_(is_within_valid_secondary) ( Addr a )
4581 {
4582    SecMap* sm = maybe_get_secmap_for ( a );
4583    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]
4584        || MC_(in_ignored_range)(a)) {
4585       /* Definitely not in use. */
4586       return False;
4587    } else {
4588       return True;
4589    }
4590 }
4591 
4592 
4593 /* For the memory leak detector, say whether or not a given word
4594    address is to be regarded as valid. */
MC_(is_valid_aligned_word)4595 Bool MC_(is_valid_aligned_word) ( Addr a )
4596 {
4597    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
4598    tl_assert(VG_IS_WORD_ALIGNED(a));
4599    if (is_mem_defined( a, sizeof(UWord), NULL, NULL) == MC_Ok
4600        && !MC_(in_ignored_range)(a)) {
4601       return True;
4602    } else {
4603       return False;
4604    }
4605 }
4606 
4607 
4608 /*------------------------------------------------------------*/
4609 /*--- Initialisation                                       ---*/
4610 /*------------------------------------------------------------*/
4611 
init_shadow_memory(void)4612 static void init_shadow_memory ( void )
4613 {
4614    Int     i;
4615    SecMap* sm;
4616 
4617    tl_assert(V_BIT_UNDEFINED   == 1);
4618    tl_assert(V_BIT_DEFINED     == 0);
4619    tl_assert(V_BITS8_UNDEFINED == 0xFF);
4620    tl_assert(V_BITS8_DEFINED   == 0);
4621 
4622    /* Build the 3 distinguished secondaries */
4623    sm = &sm_distinguished[SM_DIST_NOACCESS];
4624    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
4625 
4626    sm = &sm_distinguished[SM_DIST_UNDEFINED];
4627    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
4628 
4629    sm = &sm_distinguished[SM_DIST_DEFINED];
4630    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
4631 
4632    /* Set up the primary map. */
4633    /* These entries gradually get overwritten as the used address
4634       space expands. */
4635    for (i = 0; i < N_PRIMARY_MAP; i++)
4636       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
4637 
4638    /* Auxiliary primary maps */
4639    init_auxmap_L1_L2();
4640 
4641    /* auxmap_size = auxmap_used = 0;
4642       no ... these are statically initialised */
4643 
4644    /* Secondary V bit table */
4645    secVBitTable = createSecVBitTable();
4646 }
4647 
4648 
4649 /*------------------------------------------------------------*/
4650 /*--- Sanity check machinery (permanently engaged)         ---*/
4651 /*------------------------------------------------------------*/
4652 
mc_cheap_sanity_check(void)4653 static Bool mc_cheap_sanity_check ( void )
4654 {
4655    n_sanity_cheap++;
4656    PROF_EVENT(490, "cheap_sanity_check");
4657    /* Check for sane operating level */
4658    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4659       return False;
4660    /* nothing else useful we can rapidly check */
4661    return True;
4662 }
4663 
mc_expensive_sanity_check(void)4664 static Bool mc_expensive_sanity_check ( void )
4665 {
4666    Int     i;
4667    Word    n_secmaps_found;
4668    SecMap* sm;
4669    HChar*  errmsg;
4670    Bool    bad = False;
4671 
4672    if (0) VG_(printf)("expensive sanity check\n");
4673    if (0) return True;
4674 
4675    n_sanity_expensive++;
4676    PROF_EVENT(491, "expensive_sanity_check");
4677 
4678    /* Check for sane operating level */
4679    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4680       return False;
4681 
4682    /* Check that the 3 distinguished SMs are still as they should be. */
4683 
4684    /* Check noaccess DSM. */
4685    sm = &sm_distinguished[SM_DIST_NOACCESS];
4686    for (i = 0; i < SM_CHUNKS; i++)
4687       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
4688          bad = True;
4689 
4690    /* Check undefined DSM. */
4691    sm = &sm_distinguished[SM_DIST_UNDEFINED];
4692    for (i = 0; i < SM_CHUNKS; i++)
4693       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
4694          bad = True;
4695 
4696    /* Check defined DSM. */
4697    sm = &sm_distinguished[SM_DIST_DEFINED];
4698    for (i = 0; i < SM_CHUNKS; i++)
4699       if (sm->vabits8[i] != VA_BITS8_DEFINED)
4700          bad = True;
4701 
4702    if (bad) {
4703       VG_(printf)("memcheck expensive sanity: "
4704                   "distinguished_secondaries have changed\n");
4705       return False;
4706    }
4707 
4708    /* If we're not checking for undefined value errors, the secondary V bit
4709     * table should be empty. */
4710    if (MC_(clo_mc_level) == 1) {
4711       if (0 != VG_(OSetGen_Size)(secVBitTable))
4712          return False;
4713    }
4714 
4715    /* check the auxiliary maps, very thoroughly */
4716    n_secmaps_found = 0;
4717    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
4718    if (errmsg) {
4719       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
4720       return False;
4721    }
4722 
4723    /* n_secmaps_found is now the number referred to by the auxiliary
4724       primary map.  Now add on the ones referred to by the main
4725       primary map. */
4726    for (i = 0; i < N_PRIMARY_MAP; i++) {
4727       if (primary_map[i] == NULL) {
4728          bad = True;
4729       } else {
4730          if (!is_distinguished_sm(primary_map[i]))
4731             n_secmaps_found++;
4732       }
4733    }
4734 
4735    /* check that the number of secmaps issued matches the number that
4736       are reachable (iow, no secmap leaks) */
4737    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
4738       bad = True;
4739 
4740    if (bad) {
4741       VG_(printf)("memcheck expensive sanity: "
4742                   "apparent secmap leakage\n");
4743       return False;
4744    }
4745 
4746    if (bad) {
4747       VG_(printf)("memcheck expensive sanity: "
4748                   "auxmap covers wrong address space\n");
4749       return False;
4750    }
4751 
4752    /* there is only one pointer to each secmap (expensive) */
4753 
4754    return True;
4755 }
4756 
4757 /*------------------------------------------------------------*/
4758 /*--- Command line args                                    ---*/
4759 /*------------------------------------------------------------*/
4760 
4761 Bool          MC_(clo_partial_loads_ok)       = False;
4762 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
4763 Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
4764 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
4765 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
4766 Bool          MC_(clo_show_reachable)         = False;
4767 Bool          MC_(clo_show_possibly_lost)     = True;
4768 Bool          MC_(clo_workaround_gcc296_bugs) = False;
4769 Int           MC_(clo_malloc_fill)            = -1;
4770 Int           MC_(clo_free_fill)              = -1;
4771 Int           MC_(clo_mc_level)               = 2;
4772 const char*   MC_(clo_summary_file)           = NULL;
4773 
4774 
mc_process_cmd_line_options(Char * arg)4775 static Bool mc_process_cmd_line_options(Char* arg)
4776 {
4777    Char* tmp_str;
4778 
4779    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
4780 
4781    /* Set MC_(clo_mc_level):
4782          1 = A bit tracking only
4783          2 = A and V bit tracking, but no V bit origins
4784          3 = A and V bit tracking, and V bit origins
4785 
4786       Do this by inspecting --undef-value-errors= and
4787       --track-origins=.  Reject the case --undef-value-errors=no
4788       --track-origins=yes as meaningless.
4789    */
4790    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
4791       if (MC_(clo_mc_level) == 3) {
4792          goto bad_level;
4793       } else {
4794          MC_(clo_mc_level) = 1;
4795          return True;
4796       }
4797    }
4798    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
4799       if (MC_(clo_mc_level) == 1)
4800          MC_(clo_mc_level) = 2;
4801       return True;
4802    }
4803    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
4804       if (MC_(clo_mc_level) == 3)
4805          MC_(clo_mc_level) = 2;
4806       return True;
4807    }
4808    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
4809       if (MC_(clo_mc_level) == 1) {
4810          goto bad_level;
4811       } else {
4812          MC_(clo_mc_level) = 3;
4813          return True;
4814       }
4815    }
4816 
4817 	if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
4818    else if VG_BOOL_CLO(arg, "--show-reachable",   MC_(clo_show_reachable))   {}
4819    else if VG_BOOL_CLO(arg, "--show-possibly-lost",
4820                                             MC_(clo_show_possibly_lost))     {}
4821    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
4822                                             MC_(clo_workaround_gcc296_bugs)) {}
4823 
4824    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
4825                                                0, 10*1000*1000*1000LL) {}
4826 
4827    else if VG_BINT_CLO(arg, "--freelist-big-blocks",
4828                        MC_(clo_freelist_big_blocks),
4829                        0, 10*1000*1000*1000LL) {}
4830 
4831    else if VG_XACT_CLO(arg, "--leak-check=no",
4832                             MC_(clo_leak_check), LC_Off) {}
4833    else if VG_XACT_CLO(arg, "--leak-check=summary",
4834                             MC_(clo_leak_check), LC_Summary) {}
4835    else if VG_XACT_CLO(arg, "--leak-check=yes",
4836                             MC_(clo_leak_check), LC_Full) {}
4837    else if VG_XACT_CLO(arg, "--leak-check=full",
4838                             MC_(clo_leak_check), LC_Full) {}
4839 
4840    else if VG_XACT_CLO(arg, "--leak-resolution=low",
4841                             MC_(clo_leak_resolution), Vg_LowRes) {}
4842    else if VG_XACT_CLO(arg, "--leak-resolution=med",
4843                             MC_(clo_leak_resolution), Vg_MedRes) {}
4844    else if VG_XACT_CLO(arg, "--leak-resolution=high",
4845                             MC_(clo_leak_resolution), Vg_HighRes) {}
4846 
4847    else if VG_STR_CLO(arg, "--summary-file", tmp_str) {
4848       MC_(clo_summary_file) = VG_(strdup)("clo_summary_file", tmp_str);
4849    }
4850    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
4851       Int  i;
4852       Bool ok  = parse_ignore_ranges(tmp_str);
4853       if (!ok)
4854         return False;
4855       tl_assert(ignoreRanges.used >= 0);
4856       tl_assert(ignoreRanges.used < M_IGNORE_RANGES);
4857       for (i = 0; i < ignoreRanges.used; i++) {
4858          Addr s = ignoreRanges.start[i];
4859          Addr e = ignoreRanges.end[i];
4860          Addr limit = 0x4000000; /* 64M - entirely arbitrary limit */
4861          if (e <= s) {
4862             VG_(message)(Vg_DebugMsg,
4863                "ERROR: --ignore-ranges: end <= start in range:\n");
4864             VG_(message)(Vg_DebugMsg,
4865                "       0x%lx-0x%lx\n", s, e);
4866             return False;
4867          }
4868          if (e - s > limit) {
4869             VG_(message)(Vg_DebugMsg,
4870                "ERROR: --ignore-ranges: suspiciously large range:\n");
4871             VG_(message)(Vg_DebugMsg,
4872                "       0x%lx-0x%lx (size %ld)\n", s, e, (UWord)(e-s));
4873             return False;
4874 	 }
4875       }
4876    }
4877 
4878    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
4879    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
4880 
4881    else
4882       return VG_(replacement_malloc_process_cmd_line_option)(arg);
4883 
4884    return True;
4885 
4886 
4887   bad_level:
4888    VG_(fmsg_bad_option)(arg,
4889       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
4890 }
4891 
mc_print_usage(void)4892 static void mc_print_usage(void)
4893 {
4894    VG_(printf)(
4895 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
4896 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
4897 "    --show-reachable=no|yes          show reachable blocks in leak check? [no]\n"
4898 "    --show-possibly-lost=no|yes      show possibly lost blocks in leak check?\n"
4899 "                                     [yes]\n"
4900 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
4901 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
4902 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [no]\n"
4903 "    --freelist-vol=<number>          volume of freed blocks queue      [20000000]\n"
4904 "    --freelist-big-blocks=<number>   releases first blocks with size >= [1000000]\n"
4905 "    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
4906 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
4907 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
4908 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
4909    );
4910 }
4911 
mc_print_debug_usage(void)4912 static void mc_print_debug_usage(void)
4913 {
4914    VG_(printf)(
4915 "    (none)\n"
4916    );
4917 }
4918 
4919 
4920 /*------------------------------------------------------------*/
4921 /*--- Client blocks                                        ---*/
4922 /*------------------------------------------------------------*/
4923 
4924 /* Client block management:
4925 
4926    This is managed as an expanding array of client block descriptors.
4927    Indices of live descriptors are issued to the client, so it can ask
4928    to free them later.  Therefore we cannot slide live entries down
4929    over dead ones.  Instead we must use free/inuse flags and scan for
4930    an empty slot at allocation time.  This in turn means allocation is
4931    relatively expensive, so we hope this does not happen too often.
4932 
4933    An unused block has start == size == 0
4934 */
4935 
4936 /* type CGenBlock is defined in mc_include.h */
4937 
4938 /* This subsystem is self-initialising. */
4939 static UWord      cgb_size = 0;
4940 static UWord      cgb_used = 0;
4941 static CGenBlock* cgbs     = NULL;
4942 
4943 /* Stats for this subsystem. */
4944 static ULong cgb_used_MAX = 0;   /* Max in use. */
4945 static ULong cgb_allocs   = 0;   /* Number of allocs. */
4946 static ULong cgb_discards = 0;   /* Number of discards. */
4947 static ULong cgb_search   = 0;   /* Number of searches. */
4948 
4949 
4950 /* Get access to the client block array. */
MC_(get_ClientBlock_array)4951 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
4952                                  /*OUT*/UWord* nBlocks )
4953 {
4954    *blocks  = cgbs;
4955    *nBlocks = cgb_used;
4956 }
4957 
4958 
4959 static
alloc_client_block(void)4960 Int alloc_client_block ( void )
4961 {
4962    UWord      i, sz_new;
4963    CGenBlock* cgbs_new;
4964 
4965    cgb_allocs++;
4966 
4967    for (i = 0; i < cgb_used; i++) {
4968       cgb_search++;
4969       if (cgbs[i].start == 0 && cgbs[i].size == 0)
4970          return i;
4971    }
4972 
4973    /* Not found.  Try to allocate one at the end. */
4974    if (cgb_used < cgb_size) {
4975       cgb_used++;
4976       return cgb_used-1;
4977    }
4978 
4979    /* Ok, we have to allocate a new one. */
4980    tl_assert(cgb_used == cgb_size);
4981    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
4982 
4983    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
4984    for (i = 0; i < cgb_used; i++)
4985       cgbs_new[i] = cgbs[i];
4986 
4987    if (cgbs != NULL)
4988       VG_(free)( cgbs );
4989    cgbs = cgbs_new;
4990 
4991    cgb_size = sz_new;
4992    cgb_used++;
4993    if (cgb_used > cgb_used_MAX)
4994       cgb_used_MAX = cgb_used;
4995    return cgb_used-1;
4996 }
4997 
4998 
show_client_block_stats(void)4999 static void show_client_block_stats ( void )
5000 {
5001    VG_(message)(Vg_DebugMsg,
5002       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
5003       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
5004    );
5005 }
print_monitor_help(void)5006 static void print_monitor_help ( void )
5007 {
5008    VG_(gdb_printf)
5009       (
5010 "\n"
5011 "memcheck monitor commands:\n"
5012 "  get_vbits <addr> [<len>]\n"
5013 "        returns validity bits for <len> (or 1) bytes at <addr>\n"
5014 "            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
5015 "        Example: get_vbits 0x8049c78 10\n"
5016 "  make_memory [noaccess|undefined\n"
5017 "                     |defined|Definedifaddressable] <addr> [<len>]\n"
5018 "        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
5019 "  check_memory [addressable|defined] <addr> [<len>]\n"
5020 "        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
5021 "            and outputs a description of <addr>\n"
5022 "  leak_check [full*|summary] [reachable|possibleleak*|definiteleak]\n"
5023 "                [increased*|changed|any]\n"
5024 "            * = defaults\n"
5025 "        Examples: leak_check\n"
5026 "                  leak_check summary any\n"
5027 "\n");
5028 }
5029 
5030 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,Char * req)5031 static Bool handle_gdb_monitor_command (ThreadId tid, Char *req)
5032 {
5033    Char* wcmd;
5034    Char s[VG_(strlen(req))]; /* copy for strtok_r */
5035    Char *ssaveptr;
5036 
5037    VG_(strcpy) (s, req);
5038 
5039    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
5040    /* NB: if possible, avoid introducing a new command below which
5041       starts with the same first letter(s) as an already existing
5042       command. This ensures a shorter abbreviation for the user. */
5043    switch (VG_(keyword_id)
5044            ("help get_vbits leak_check make_memory check_memory",
5045             wcmd, kwd_report_duplicated_matches)) {
5046    case -2: /* multiple matches */
5047       return True;
5048    case -1: /* not found */
5049       return False;
5050    case  0: /* help */
5051       print_monitor_help();
5052       return True;
5053    case  1: { /* get_vbits */
5054       Addr address;
5055       SizeT szB = 1;
5056       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5057       if (szB != 0) {
5058          UChar vbits;
5059          Int i;
5060          Int unaddressable = 0;
5061          for (i = 0; i < szB; i++) {
5062             Int res = mc_get_or_set_vbits_for_client
5063                (address+i, (Addr) &vbits, 1,
5064                 False, /* get them */
5065                 False  /* is client request */ );
5066             if ((i % 32) == 0 && i != 0)
5067                VG_(gdb_printf) ("\n");
5068             else if ((i % 4) == 0 && i != 0)
5069                VG_(gdb_printf) (" ");
5070             if (res == 1) {
5071                VG_(gdb_printf) ("%02x", vbits);
5072             } else {
5073                tl_assert(3 == res);
5074                unaddressable++;
5075                VG_(gdb_printf) ("__");
5076             }
5077          }
5078          if ((i % 80) != 0)
5079             VG_(gdb_printf) ("\n");
5080          if (unaddressable) {
5081             VG_(gdb_printf)
5082                ("Address %p len %ld has %d bytes unaddressable\n",
5083                 (void *)address, szB, unaddressable);
5084          }
5085       }
5086       return True;
5087    }
5088    case  2: { /* leak_check */
5089       Int err = 0;
5090       LeakCheckParams lcp;
5091       Char* kw;
5092 
5093       lcp.mode               = LC_Full;
5094       lcp.show_reachable     = False;
5095       lcp.show_possibly_lost = True;
5096       lcp.deltamode          = LCD_Increased;
5097       lcp.requested_by_monitor_command = True;
5098 
5099       for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
5100            kw != NULL;
5101            kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
5102          switch (VG_(keyword_id)
5103                  ("full summary "
5104                   "reachable possibleleak definiteleak "
5105                   "increased changed any",
5106                   kw, kwd_report_all)) {
5107          case -2: err++; break;
5108          case -1: err++; break;
5109          case  0: /* full */
5110             lcp.mode = LC_Full; break;
5111          case  1: /* summary */
5112             lcp.mode = LC_Summary; break;
5113          case  2: /* reachable */
5114             lcp.show_reachable = True;
5115             lcp.show_possibly_lost = True; break;
5116          case  3: /* possibleleak */
5117             lcp.show_reachable = False;
5118             lcp.show_possibly_lost = True; break;
5119          case  4: /* definiteleak */
5120             lcp.show_reachable = False;
5121             lcp.show_possibly_lost = False; break;
5122          case  5: /* increased */
5123             lcp.deltamode = LCD_Increased; break;
5124          case  6: /* changed */
5125             lcp.deltamode = LCD_Changed; break;
5126          case  7: /* any */
5127             lcp.deltamode = LCD_Any; break;
5128          default:
5129             tl_assert (0);
5130          }
5131       }
5132       if (!err)
5133          MC_(detect_memory_leaks)(tid, lcp);
5134       return True;
5135    }
5136 
5137    case  3: { /* make_memory */
5138       Addr address;
5139       SizeT szB = 1;
5140       int kwdid = VG_(keyword_id)
5141          ("noaccess undefined defined Definedifaddressable",
5142           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5143       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5144       if (address == (Addr) 0 && szB == 0) return True;
5145       switch (kwdid) {
5146       case -2: break;
5147       case -1: break;
5148       case  0: MC_(make_mem_noaccess) (address, szB); break;
5149       case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
5150                                                     MC_OKIND_USER ); break;
5151       case  2: MC_(make_mem_defined) ( address, szB ); break;
5152       case  3: make_mem_defined_if_addressable ( address, szB ); break;;
5153       default: tl_assert(0);
5154       }
5155       return True;
5156    }
5157 
5158    case  4: { /* check_memory */
5159       Addr address;
5160       SizeT szB = 1;
5161       Addr bad_addr;
5162       UInt okind;
5163       char* src;
5164       UInt otag;
5165       UInt ecu;
5166       ExeContext* origin_ec;
5167       MC_ReadResult res;
5168 
5169       int kwdid = VG_(keyword_id)
5170          ("addressable defined",
5171           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5172       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5173       if (address == (Addr) 0 && szB == 0) return True;
5174       switch (kwdid) {
5175       case -2: break;
5176       case -1: break;
5177       case  0:
5178          if (is_mem_addressable ( address, szB, &bad_addr ))
5179             VG_(gdb_printf) ("Address %p len %ld addressable\n",
5180                              (void *)address, szB);
5181          else
5182             VG_(gdb_printf)
5183                ("Address %p len %ld not addressable:\nbad address %p\n",
5184                 (void *)address, szB, (void *) bad_addr);
5185          MC_(pp_describe_addr) (address);
5186          break;
5187       case  1: res = is_mem_defined ( address, szB, &bad_addr, &otag );
5188          if (MC_AddrErr == res)
5189             VG_(gdb_printf)
5190                ("Address %p len %ld not addressable:\nbad address %p\n",
5191                 (void *)address, szB, (void *) bad_addr);
5192          else if (MC_ValueErr == res) {
5193             okind = otag & 3;
5194             switch (okind) {
5195             case MC_OKIND_STACK:
5196                src = " was created by a stack allocation"; break;
5197             case MC_OKIND_HEAP:
5198                src = " was created by a heap allocation"; break;
5199             case MC_OKIND_USER:
5200                src = " was created by a client request"; break;
5201             case MC_OKIND_UNKNOWN:
5202                src = ""; break;
5203             default: tl_assert(0);
5204             }
5205             VG_(gdb_printf)
5206                ("Address %p len %ld not defined:\n"
5207                 "Uninitialised value at %p%s\n",
5208                 (void *)address, szB, (void *) bad_addr, src);
5209             ecu = otag & ~3;
5210             if (VG_(is_plausible_ECU)(ecu)) {
5211                origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
5212                VG_(pp_ExeContext)( origin_ec );
5213             }
5214          }
5215          else
5216             VG_(gdb_printf) ("Address %p len %ld defined\n",
5217                              (void *)address, szB);
5218          MC_(pp_describe_addr) (address);
5219          break;
5220       default: tl_assert(0);
5221       }
5222       return True;
5223    }
5224 
5225    default:
5226       tl_assert(0);
5227       return False;
5228    }
5229 }
5230 
5231 /*------------------------------------------------------------*/
5232 /*--- Client requests                                      ---*/
5233 /*------------------------------------------------------------*/
5234 
mc_handle_client_request(ThreadId tid,UWord * arg,UWord * ret)5235 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
5236 {
5237    Int   i;
5238    Bool  ok;
5239    Addr  bad_addr;
5240 
5241    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
5242        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
5243        && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
5244        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
5245        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
5246        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
5247        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
5248        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
5249        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
5250        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
5251        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
5252        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
5253        && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0])
5254       return False;
5255 
5256    switch (arg[0]) {
5257       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE:
5258          ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
5259          if (!ok)
5260             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5261          *ret = ok ? (UWord)NULL : bad_addr;
5262          break;
5263 
5264       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
5265          Bool errorV    = False;
5266          Addr bad_addrV = 0;
5267          UInt otagV     = 0;
5268          Bool errorA    = False;
5269          Addr bad_addrA = 0;
5270          is_mem_defined_comprehensive(
5271             arg[1], arg[2],
5272             &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
5273          );
5274          if (errorV) {
5275             MC_(record_user_error) ( tid, bad_addrV,
5276                                      /*isAddrErr*/False, otagV );
5277          }
5278          if (errorA) {
5279             MC_(record_user_error) ( tid, bad_addrA,
5280                                      /*isAddrErr*/True, 0 );
5281          }
5282          /* Return the lower of the two erring addresses, if any. */
5283          *ret = 0;
5284          if (errorV && !errorA) {
5285             *ret = bad_addrV;
5286          }
5287          if (!errorV && errorA) {
5288             *ret = bad_addrA;
5289          }
5290          if (errorV && errorA) {
5291             *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
5292          }
5293          break;
5294       }
5295 
5296       case VG_USERREQ__DO_LEAK_CHECK: {
5297          LeakCheckParams lcp;
5298 
5299          if (arg[1] == 0)
5300             lcp.mode = LC_Full;
5301          else if (arg[1] == 1)
5302             lcp.mode = LC_Summary;
5303          else {
5304             VG_(message)(Vg_UserMsg,
5305                          "Warning: unknown memcheck leak search mode\n");
5306             lcp.mode = LC_Full;
5307          }
5308 
5309          lcp.show_reachable = MC_(clo_show_reachable);
5310          lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
5311 
5312          if (arg[2] == 0)
5313             lcp.deltamode = LCD_Any;
5314          else if (arg[2] == 1)
5315             lcp.deltamode = LCD_Increased;
5316          else if (arg[2] == 2)
5317             lcp.deltamode = LCD_Changed;
5318          else {
5319             VG_(message)
5320                (Vg_UserMsg,
5321                 "Warning: unknown memcheck leak search deltamode\n");
5322             lcp.deltamode = LCD_Any;
5323          }
5324          lcp.requested_by_monitor_command = False;
5325 
5326          MC_(detect_memory_leaks)(tid, lcp);
5327          *ret = 0; /* return value is meaningless */
5328          break;
5329       }
5330 
5331       case VG_USERREQ__MAKE_MEM_NOACCESS:
5332          MC_(make_mem_noaccess) ( arg[1], arg[2] );
5333          *ret = -1;
5334          break;
5335 
5336       case VG_USERREQ__MAKE_MEM_UNDEFINED:
5337          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
5338                                               MC_OKIND_USER );
5339          *ret = -1;
5340          break;
5341 
5342       case VG_USERREQ__MAKE_MEM_DEFINED:
5343          MC_(make_mem_defined) ( arg[1], arg[2] );
5344          *ret = -1;
5345          break;
5346 
5347       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
5348          make_mem_defined_if_addressable ( arg[1], arg[2] );
5349          *ret = -1;
5350          break;
5351 
5352       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
5353          if (arg[1] != 0 && arg[2] != 0) {
5354             i = alloc_client_block();
5355             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
5356             cgbs[i].start = arg[1];
5357             cgbs[i].size  = arg[2];
5358             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]);
5359             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
5360             *ret = i;
5361          } else
5362             *ret = -1;
5363          break;
5364 
5365       case VG_USERREQ__DISCARD: /* discard */
5366          if (cgbs == NULL
5367              || arg[2] >= cgb_used ||
5368              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
5369             *ret = 1;
5370          } else {
5371             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
5372             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
5373             VG_(free)(cgbs[arg[2]].desc);
5374             cgb_discards++;
5375             *ret = 0;
5376          }
5377          break;
5378 
5379       case VG_USERREQ__GET_VBITS:
5380          *ret = mc_get_or_set_vbits_for_client
5381                    ( arg[1], arg[2], arg[3],
5382                      False /* get them */,
5383                      True /* is client request */ );
5384          break;
5385 
5386       case VG_USERREQ__SET_VBITS:
5387          *ret = mc_get_or_set_vbits_for_client
5388                    ( arg[1], arg[2], arg[3],
5389                      True /* set them */,
5390                      True /* is client request */ );
5391          break;
5392 
5393       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
5394          UWord** argp = (UWord**)arg;
5395          // MC_(bytes_leaked) et al were set by the last leak check (or zero
5396          // if no prior leak checks performed).
5397          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
5398          *argp[2] = MC_(bytes_dubious);
5399          *argp[3] = MC_(bytes_reachable);
5400          *argp[4] = MC_(bytes_suppressed);
5401          // there is no argp[5]
5402          //*argp[5] = MC_(bytes_indirect);
5403          // XXX need to make *argp[1-4] defined;  currently done in the
5404          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
5405          *ret = 0;
5406          return True;
5407       }
5408       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
5409          UWord** argp = (UWord**)arg;
5410          // MC_(blocks_leaked) et al were set by the last leak check (or zero
5411          // if no prior leak checks performed).
5412          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
5413          *argp[2] = MC_(blocks_dubious);
5414          *argp[3] = MC_(blocks_reachable);
5415          *argp[4] = MC_(blocks_suppressed);
5416          // there is no argp[5]
5417          //*argp[5] = MC_(blocks_indirect);
5418          // XXX need to make *argp[1-4] defined;  currently done in the
5419          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
5420          *ret = 0;
5421          return True;
5422       }
5423       case VG_USERREQ__MALLOCLIKE_BLOCK: {
5424          Addr p         = (Addr)arg[1];
5425          SizeT sizeB    =       arg[2];
5426          //UInt rzB       =       arg[3];    XXX: unused!
5427          Bool is_zeroed = (Bool)arg[4];
5428 
5429          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
5430                           MC_AllocCustom, MC_(malloc_list) );
5431          return True;
5432       }
5433       case VG_USERREQ__RESIZEINPLACE_BLOCK: {
5434          Addr p         = (Addr)arg[1];
5435          SizeT oldSizeB =       arg[2];
5436          SizeT newSizeB =       arg[3];
5437          UInt rzB       =       arg[4];
5438 
5439          MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
5440          return True;
5441       }
5442       case VG_USERREQ__FREELIKE_BLOCK: {
5443          Addr p         = (Addr)arg[1];
5444          UInt rzB       =       arg[2];
5445 
5446          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
5447          return True;
5448       }
5449 
5450       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
5451          Char* s   = (Char*)arg[1];
5452          Addr  dst = (Addr) arg[2];
5453          Addr  src = (Addr) arg[3];
5454          SizeT len = (SizeT)arg[4];
5455          MC_(record_overlap_error)(tid, s, src, dst, len);
5456          return True;
5457       }
5458 
5459       case VG_USERREQ__CREATE_MEMPOOL: {
5460          Addr pool      = (Addr)arg[1];
5461          UInt rzB       =       arg[2];
5462          Bool is_zeroed = (Bool)arg[3];
5463 
5464          MC_(create_mempool) ( pool, rzB, is_zeroed );
5465          return True;
5466       }
5467 
5468       case VG_USERREQ__DESTROY_MEMPOOL: {
5469          Addr pool      = (Addr)arg[1];
5470 
5471          MC_(destroy_mempool) ( pool );
5472          return True;
5473       }
5474 
5475       case VG_USERREQ__MEMPOOL_ALLOC: {
5476          Addr pool      = (Addr)arg[1];
5477          Addr addr      = (Addr)arg[2];
5478          UInt size      =       arg[3];
5479 
5480          MC_(mempool_alloc) ( tid, pool, addr, size );
5481          return True;
5482       }
5483 
5484       case VG_USERREQ__MEMPOOL_FREE: {
5485          Addr pool      = (Addr)arg[1];
5486          Addr addr      = (Addr)arg[2];
5487 
5488          MC_(mempool_free) ( pool, addr );
5489          return True;
5490       }
5491 
5492       case VG_USERREQ__MEMPOOL_TRIM: {
5493          Addr pool      = (Addr)arg[1];
5494          Addr addr      = (Addr)arg[2];
5495          UInt size      =       arg[3];
5496 
5497          MC_(mempool_trim) ( pool, addr, size );
5498          return True;
5499       }
5500 
5501       case VG_USERREQ__MOVE_MEMPOOL: {
5502          Addr poolA     = (Addr)arg[1];
5503          Addr poolB     = (Addr)arg[2];
5504 
5505          MC_(move_mempool) ( poolA, poolB );
5506          return True;
5507       }
5508 
5509       case VG_USERREQ__MEMPOOL_CHANGE: {
5510          Addr pool      = (Addr)arg[1];
5511          Addr addrA     = (Addr)arg[2];
5512          Addr addrB     = (Addr)arg[3];
5513          UInt size      =       arg[4];
5514 
5515          MC_(mempool_change) ( pool, addrA, addrB, size );
5516          return True;
5517       }
5518 
5519       case VG_USERREQ__MEMPOOL_EXISTS: {
5520          Addr pool      = (Addr)arg[1];
5521 
5522          *ret = (UWord) MC_(mempool_exists) ( pool );
5523 	 return True;
5524       }
5525 
5526       case VG_USERREQ__GDB_MONITOR_COMMAND: {
5527          Bool handled = handle_gdb_monitor_command (tid, (Char*)arg[1]);
5528          if (handled)
5529             *ret = 1;
5530          else
5531             *ret = 0;
5532          return handled;
5533       }
5534 
5535       default:
5536          VG_(message)(
5537             Vg_UserMsg,
5538             "Warning: unknown memcheck client request code %llx\n",
5539             (ULong)arg[0]
5540          );
5541          return False;
5542    }
5543    return True;
5544 }
5545 
5546 
5547 /*------------------------------------------------------------*/
5548 /*--- Crude profiling machinery.                           ---*/
5549 /*------------------------------------------------------------*/
5550 
5551 // We track a number of interesting events (using PROF_EVENT)
5552 // if MC_PROFILE_MEMORY is defined.
5553 
5554 #ifdef MC_PROFILE_MEMORY
5555 
5556 UInt   MC_(event_ctr)[N_PROF_EVENTS];
5557 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
5558 
init_prof_mem(void)5559 static void init_prof_mem ( void )
5560 {
5561    Int i;
5562    for (i = 0; i < N_PROF_EVENTS; i++) {
5563       MC_(event_ctr)[i] = 0;
5564       MC_(event_ctr_name)[i] = NULL;
5565    }
5566 }
5567 
done_prof_mem(void)5568 static void done_prof_mem ( void )
5569 {
5570    Int  i;
5571    Bool spaced = False;
5572    for (i = 0; i < N_PROF_EVENTS; i++) {
5573       if (!spaced && (i % 10) == 0) {
5574          VG_(printf)("\n");
5575          spaced = True;
5576       }
5577       if (MC_(event_ctr)[i] > 0) {
5578          spaced = False;
5579          VG_(printf)( "prof mem event %3d: %9d   %s\n",
5580                       i, MC_(event_ctr)[i],
5581                       MC_(event_ctr_name)[i]
5582                          ? MC_(event_ctr_name)[i] : "unnamed");
5583       }
5584    }
5585 }
5586 
5587 #else
5588 
init_prof_mem(void)5589 static void init_prof_mem ( void ) { }
done_prof_mem(void)5590 static void done_prof_mem ( void ) { }
5591 
5592 #endif
5593 
5594 
5595 /*------------------------------------------------------------*/
5596 /*--- Origin tracking stuff                                ---*/
5597 /*------------------------------------------------------------*/
5598 
5599 /*--------------------------------------------*/
5600 /*--- Origin tracking: load handlers       ---*/
5601 /*--------------------------------------------*/
5602 
merge_origins(UInt or1,UInt or2)5603 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
5604    return or1 > or2 ? or1 : or2;
5605 }
5606 
MC_(helperc_b_load1)5607 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
5608    OCacheLine* line;
5609    UChar descr;
5610    UWord lineoff = oc_line_offset(a);
5611    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5612 
5613    if (OC_ENABLE_ASSERTIONS) {
5614       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5615    }
5616 
5617    line = find_OCacheLine( a );
5618 
5619    descr = line->descr[lineoff];
5620    if (OC_ENABLE_ASSERTIONS) {
5621       tl_assert(descr < 0x10);
5622    }
5623 
5624    if (LIKELY(0 == (descr & (1 << byteoff))))  {
5625       return 0;
5626    } else {
5627       return line->w32[lineoff];
5628    }
5629 }
5630 
MC_(helperc_b_load2)5631 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
5632    OCacheLine* line;
5633    UChar descr;
5634    UWord lineoff, byteoff;
5635 
5636    if (UNLIKELY(a & 1)) {
5637       /* Handle misaligned case, slowly. */
5638       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
5639       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
5640       return merge_origins(oLo, oHi);
5641    }
5642 
5643    lineoff = oc_line_offset(a);
5644    byteoff = a & 3; /* 0 or 2 */
5645 
5646    if (OC_ENABLE_ASSERTIONS) {
5647       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5648    }
5649    line = find_OCacheLine( a );
5650 
5651    descr = line->descr[lineoff];
5652    if (OC_ENABLE_ASSERTIONS) {
5653       tl_assert(descr < 0x10);
5654    }
5655 
5656    if (LIKELY(0 == (descr & (3 << byteoff)))) {
5657       return 0;
5658    } else {
5659       return line->w32[lineoff];
5660    }
5661 }
5662 
MC_(helperc_b_load4)5663 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
5664    OCacheLine* line;
5665    UChar descr;
5666    UWord lineoff;
5667 
5668    if (UNLIKELY(a & 3)) {
5669       /* Handle misaligned case, slowly. */
5670       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
5671       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
5672       return merge_origins(oLo, oHi);
5673    }
5674 
5675    lineoff = oc_line_offset(a);
5676    if (OC_ENABLE_ASSERTIONS) {
5677       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5678    }
5679 
5680    line = find_OCacheLine( a );
5681 
5682    descr = line->descr[lineoff];
5683    if (OC_ENABLE_ASSERTIONS) {
5684       tl_assert(descr < 0x10);
5685    }
5686 
5687    if (LIKELY(0 == descr)) {
5688       return 0;
5689    } else {
5690       return line->w32[lineoff];
5691    }
5692 }
5693 
MC_(helperc_b_load8)5694 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
5695    OCacheLine* line;
5696    UChar descrLo, descrHi, descr;
5697    UWord lineoff;
5698 
5699    if (UNLIKELY(a & 7)) {
5700       /* Handle misaligned case, slowly. */
5701       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
5702       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
5703       return merge_origins(oLo, oHi);
5704    }
5705 
5706    lineoff = oc_line_offset(a);
5707    if (OC_ENABLE_ASSERTIONS) {
5708       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5709    }
5710 
5711    line = find_OCacheLine( a );
5712 
5713    descrLo = line->descr[lineoff + 0];
5714    descrHi = line->descr[lineoff + 1];
5715    descr   = descrLo | descrHi;
5716    if (OC_ENABLE_ASSERTIONS) {
5717       tl_assert(descr < 0x10);
5718    }
5719 
5720    if (LIKELY(0 == descr)) {
5721       return 0; /* both 32-bit chunks are defined */
5722    } else {
5723       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
5724       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
5725       return merge_origins(oLo, oHi);
5726    }
5727 }
5728 
MC_(helperc_b_load16)5729 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
5730    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
5731    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
5732    UInt oBoth = merge_origins(oLo, oHi);
5733    return (UWord)oBoth;
5734 }
5735 
5736 
5737 /*--------------------------------------------*/
5738 /*--- Origin tracking: store handlers      ---*/
5739 /*--------------------------------------------*/
5740 
MC_(helperc_b_store1)5741 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
5742    OCacheLine* line;
5743    UWord lineoff = oc_line_offset(a);
5744    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5745 
5746    if (OC_ENABLE_ASSERTIONS) {
5747       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5748    }
5749 
5750    line = find_OCacheLine( a );
5751 
5752    if (d32 == 0) {
5753       line->descr[lineoff] &= ~(1 << byteoff);
5754    } else {
5755       line->descr[lineoff] |= (1 << byteoff);
5756       line->w32[lineoff] = d32;
5757    }
5758 }
5759 
MC_(helperc_b_store2)5760 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
5761    OCacheLine* line;
5762    UWord lineoff, byteoff;
5763 
5764    if (UNLIKELY(a & 1)) {
5765       /* Handle misaligned case, slowly. */
5766       MC_(helperc_b_store1)( a + 0, d32 );
5767       MC_(helperc_b_store1)( a + 1, d32 );
5768       return;
5769    }
5770 
5771    lineoff = oc_line_offset(a);
5772    byteoff = a & 3; /* 0 or 2 */
5773 
5774    if (OC_ENABLE_ASSERTIONS) {
5775       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5776    }
5777 
5778    line = find_OCacheLine( a );
5779 
5780    if (d32 == 0) {
5781       line->descr[lineoff] &= ~(3 << byteoff);
5782    } else {
5783       line->descr[lineoff] |= (3 << byteoff);
5784       line->w32[lineoff] = d32;
5785    }
5786 }
5787 
MC_(helperc_b_store4)5788 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
5789    OCacheLine* line;
5790    UWord lineoff;
5791 
5792    if (UNLIKELY(a & 3)) {
5793       /* Handle misaligned case, slowly. */
5794       MC_(helperc_b_store2)( a + 0, d32 );
5795       MC_(helperc_b_store2)( a + 2, d32 );
5796       return;
5797    }
5798 
5799    lineoff = oc_line_offset(a);
5800    if (OC_ENABLE_ASSERTIONS) {
5801       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5802    }
5803 
5804    line = find_OCacheLine( a );
5805 
5806    if (d32 == 0) {
5807       line->descr[lineoff] = 0;
5808    } else {
5809       line->descr[lineoff] = 0xF;
5810       line->w32[lineoff] = d32;
5811    }
5812 }
5813 
MC_(helperc_b_store8)5814 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
5815    OCacheLine* line;
5816    UWord lineoff;
5817 
5818    if (UNLIKELY(a & 7)) {
5819       /* Handle misaligned case, slowly. */
5820       MC_(helperc_b_store4)( a + 0, d32 );
5821       MC_(helperc_b_store4)( a + 4, d32 );
5822       return;
5823    }
5824 
5825    lineoff = oc_line_offset(a);
5826    if (OC_ENABLE_ASSERTIONS) {
5827       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5828    }
5829 
5830    line = find_OCacheLine( a );
5831 
5832    if (d32 == 0) {
5833       line->descr[lineoff + 0] = 0;
5834       line->descr[lineoff + 1] = 0;
5835    } else {
5836       line->descr[lineoff + 0] = 0xF;
5837       line->descr[lineoff + 1] = 0xF;
5838       line->w32[lineoff + 0] = d32;
5839       line->w32[lineoff + 1] = d32;
5840    }
5841 }
5842 
MC_(helperc_b_store16)5843 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
5844    MC_(helperc_b_store8)( a + 0, d32 );
5845    MC_(helperc_b_store8)( a + 8, d32 );
5846 }
5847 
5848 
5849 /*--------------------------------------------*/
5850 /*--- Origin tracking: sarp handlers       ---*/
5851 /*--------------------------------------------*/
5852 
5853 __attribute__((noinline))
ocache_sarp_Set_Origins(Addr a,UWord len,UInt otag)5854 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
5855    if ((a & 1) && len >= 1) {
5856       MC_(helperc_b_store1)( a, otag );
5857       a++;
5858       len--;
5859    }
5860    if ((a & 2) && len >= 2) {
5861       MC_(helperc_b_store2)( a, otag );
5862       a += 2;
5863       len -= 2;
5864    }
5865    if (len >= 4)
5866       tl_assert(0 == (a & 3));
5867    while (len >= 4) {
5868       MC_(helperc_b_store4)( a, otag );
5869       a += 4;
5870       len -= 4;
5871    }
5872    if (len >= 2) {
5873       MC_(helperc_b_store2)( a, otag );
5874       a += 2;
5875       len -= 2;
5876    }
5877    if (len >= 1) {
5878       MC_(helperc_b_store1)( a, otag );
5879       //a++;
5880       len--;
5881    }
5882    tl_assert(len == 0);
5883 }
5884 
5885 __attribute__((noinline))
ocache_sarp_Clear_Origins(Addr a,UWord len)5886 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
5887    if ((a & 1) && len >= 1) {
5888       MC_(helperc_b_store1)( a, 0 );
5889       a++;
5890       len--;
5891    }
5892    if ((a & 2) && len >= 2) {
5893       MC_(helperc_b_store2)( a, 0 );
5894       a += 2;
5895       len -= 2;
5896    }
5897    if (len >= 4)
5898       tl_assert(0 == (a & 3));
5899    while (len >= 4) {
5900       MC_(helperc_b_store4)( a, 0 );
5901       a += 4;
5902       len -= 4;
5903    }
5904    if (len >= 2) {
5905       MC_(helperc_b_store2)( a, 0 );
5906       a += 2;
5907       len -= 2;
5908    }
5909    if (len >= 1) {
5910       MC_(helperc_b_store1)( a, 0 );
5911       //a++;
5912       len--;
5913    }
5914    tl_assert(len == 0);
5915 }
5916 
5917 
5918 /*------------------------------------------------------------*/
5919 /*--- Setup and finalisation                               ---*/
5920 /*------------------------------------------------------------*/
5921 
mc_post_clo_init(void)5922 static void mc_post_clo_init ( void )
5923 {
5924    // timurrrr: removed the check for VG_(clo_xml) here.
5925    if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
5926       VG_(message)(Vg_UserMsg,
5927                    "Warning: --freelist-big-blocks value %lld has no effect\n"
5928                    "as it is >= to --freelist-vol value %lld\n",
5929                    MC_(clo_freelist_big_blocks),
5930                    MC_(clo_freelist_vol));
5931 
5932    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
5933 
5934    if (MC_(clo_mc_level) == 3) {
5935       /* We're doing origin tracking. */
5936 #     ifdef PERF_FAST_STACK
5937       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
5938       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
5939       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
5940       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
5941       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
5942       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
5943       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
5944       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
5945       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
5946 #     endif
5947       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
5948    } else {
5949       /* Not doing origin tracking */
5950 #     ifdef PERF_FAST_STACK
5951       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
5952       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
5953       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
5954       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
5955       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
5956       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
5957       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
5958       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
5959       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
5960 #     endif
5961       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
5962    }
5963 
5964    /* This origin tracking cache is huge (~100M), so only initialise
5965       if we need it. */
5966    if (MC_(clo_mc_level) >= 3) {
5967       init_OCache();
5968       tl_assert(ocacheL1 != NULL);
5969       tl_assert(ocacheL2 != NULL);
5970    } else {
5971       tl_assert(ocacheL1 == NULL);
5972       tl_assert(ocacheL2 == NULL);
5973    }
5974 }
5975 
print_SM_info(char * type,int n_SMs)5976 static void print_SM_info(char* type, int n_SMs)
5977 {
5978    VG_(message)(Vg_DebugMsg,
5979       " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
5980       type,
5981       n_SMs,
5982       n_SMs * sizeof(SecMap) / 1024UL,
5983       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
5984 }
5985 
mc_fini(Int exitcode)5986 static void mc_fini ( Int exitcode )
5987 {
5988    MC_(print_malloc_stats)();
5989 
5990    if (MC_(clo_leak_check) != LC_Off) {
5991       LeakCheckParams lcp;
5992       lcp.mode = MC_(clo_leak_check);
5993       lcp.show_reachable = MC_(clo_show_reachable);
5994       lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
5995       lcp.deltamode = LCD_Any;
5996       lcp.requested_by_monitor_command = False;
5997       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, lcp);
5998    } else {
5999       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6000          VG_(umsg)(
6001             "For a detailed leak analysis, rerun with: --leak-check=full\n"
6002             "\n"
6003          );
6004       }
6005    }
6006 
6007    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6008       VG_(message)(Vg_UserMsg,
6009                    "For counts of detected and suppressed errors, rerun with: -v\n");
6010    }
6011 
6012    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
6013        && MC_(clo_mc_level) == 2) {
6014       VG_(message)(Vg_UserMsg,
6015                    "Use --track-origins=yes to see where "
6016                    "uninitialised values come from\n");
6017    }
6018 
6019    done_prof_mem();
6020 
6021    if (VG_(clo_stats)) {
6022       SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
6023 
6024       VG_(message)(Vg_DebugMsg,
6025          " memcheck: sanity checks: %d cheap, %d expensive\n",
6026          n_sanity_cheap, n_sanity_expensive );
6027       VG_(message)(Vg_DebugMsg,
6028          " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
6029          n_auxmap_L2_nodes,
6030          n_auxmap_L2_nodes * 64,
6031          n_auxmap_L2_nodes / 16 );
6032       VG_(message)(Vg_DebugMsg,
6033          " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
6034          n_auxmap_L1_searches, n_auxmap_L1_cmps,
6035          (10ULL * n_auxmap_L1_cmps)
6036             / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
6037       );
6038       VG_(message)(Vg_DebugMsg,
6039          " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
6040          n_auxmap_L2_searches, n_auxmap_L2_nodes
6041       );
6042 
6043       print_SM_info("n_issued     ", n_issued_SMs);
6044       print_SM_info("n_deissued   ", n_deissued_SMs);
6045       print_SM_info("max_noaccess ", max_noaccess_SMs);
6046       print_SM_info("max_undefined", max_undefined_SMs);
6047       print_SM_info("max_defined  ", max_defined_SMs);
6048       print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
6049 
6050       // Three DSMs, plus the non-DSM ones
6051       max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
6052       // The 3*sizeof(Word) bytes is the AVL node metadata size.
6053       // The 4*sizeof(Word) bytes is the malloc metadata size.
6054       // Hardwiring these sizes in sucks, but I don't see how else to do it.
6055       max_secVBit_szB = max_secVBit_nodes *
6056             (sizeof(SecVBitNode) + 3*sizeof(Word) + 4*sizeof(Word));
6057       max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
6058 
6059       VG_(message)(Vg_DebugMsg,
6060          " memcheck: max sec V bit nodes:    %d (%ldk, %ldM)\n",
6061          max_secVBit_nodes, max_secVBit_szB / 1024,
6062                             max_secVBit_szB / (1024 * 1024));
6063       VG_(message)(Vg_DebugMsg,
6064          " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
6065          sec_vbits_new_nodes + sec_vbits_updates,
6066          sec_vbits_new_nodes, sec_vbits_updates );
6067       VG_(message)(Vg_DebugMsg,
6068          " memcheck: max shadow mem size:   %ldk, %ldM\n",
6069          max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
6070 
6071       if (MC_(clo_mc_level) >= 3) {
6072          VG_(message)(Vg_DebugMsg,
6073                       " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
6074                       stats_ocacheL1_find,
6075                       stats_ocacheL1_misses,
6076                       stats_ocacheL1_lossage );
6077          VG_(message)(Vg_DebugMsg,
6078                       " ocacheL1: %'12lu at 0   %'12lu at 1\n",
6079                       stats_ocacheL1_find - stats_ocacheL1_misses
6080                          - stats_ocacheL1_found_at_1
6081                          - stats_ocacheL1_found_at_N,
6082                       stats_ocacheL1_found_at_1 );
6083          VG_(message)(Vg_DebugMsg,
6084                       " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
6085                       stats_ocacheL1_found_at_N,
6086                       stats_ocacheL1_movefwds );
6087          VG_(message)(Vg_DebugMsg,
6088                       " ocacheL1: %'12lu sizeB  %'12u useful\n",
6089                       (UWord)sizeof(OCache),
6090                       4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
6091          VG_(message)(Vg_DebugMsg,
6092                       " ocacheL2: %'12lu refs   %'12lu misses\n",
6093                       stats__ocacheL2_refs,
6094                       stats__ocacheL2_misses );
6095          VG_(message)(Vg_DebugMsg,
6096                       " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
6097                       stats__ocacheL2_n_nodes_max,
6098                       stats__ocacheL2_n_nodes );
6099          VG_(message)(Vg_DebugMsg,
6100                       " niacache: %'12lu refs   %'12lu misses\n",
6101                       stats__nia_cache_queries, stats__nia_cache_misses);
6102       } else {
6103          tl_assert(ocacheL1 == NULL);
6104          tl_assert(ocacheL2 == NULL);
6105       }
6106    }
6107 
6108    if (0) {
6109       VG_(message)(Vg_DebugMsg,
6110         "------ Valgrind's client block stats follow ---------------\n" );
6111       show_client_block_stats();
6112    }
6113 }
6114 
6115 /* mark the given addr/len unaddressable for watchpoint implementation
6116    The PointKind will be handled at access time */
mc_mark_unaddressable_for_watchpoint(PointKind kind,Bool insert,Addr addr,SizeT len)6117 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
6118                                                   Addr addr, SizeT len)
6119 {
6120    /* GDBTD this is somewhat fishy. We might rather have to save the previous
6121       accessibility and definedness in gdbserver so as to allow restoring it
6122       properly. Currently, we assume that the user only watches things
6123       which are properly addressable and defined */
6124    if (insert)
6125       MC_(make_mem_noaccess) (addr, len);
6126    else
6127       MC_(make_mem_defined)  (addr, len);
6128    return True;
6129 }
6130 
mc_pre_clo_init(void)6131 static void mc_pre_clo_init(void)
6132 {
6133    VG_(details_name)            ("Memcheck");
6134    VG_(details_version)         (NULL);
6135    VG_(details_description)     ("a memory error detector");
6136    VG_(details_copyright_author)(
6137       "Copyright (C) 2002-2011, and GNU GPL'd, by Julian Seward et al.");
6138    VG_(details_bug_reports_to)  (VG_BUGS_TO);
6139    VG_(details_avg_translation_sizeB) ( 640 );
6140 
6141    VG_(basic_tool_funcs)          (mc_post_clo_init,
6142                                    MC_(instrument),
6143                                    mc_fini);
6144 
6145    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
6146 
6147 
6148    VG_(needs_core_errors)         ();
6149    VG_(needs_tool_errors)         (MC_(eq_Error),
6150                                    MC_(before_pp_Error),
6151                                    MC_(pp_Error),
6152                                    True,/*show TIDs for errors*/
6153                                    MC_(update_Error_extra),
6154                                    MC_(is_recognised_suppression),
6155                                    MC_(read_extra_suppression_info),
6156                                    MC_(error_matches_suppression),
6157                                    MC_(get_error_name),
6158                                    MC_(get_extra_suppression_info));
6159    VG_(needs_libc_freeres)        ();
6160    VG_(needs_command_line_options)(mc_process_cmd_line_options,
6161                                    mc_print_usage,
6162                                    mc_print_debug_usage);
6163    VG_(needs_client_requests)     (mc_handle_client_request);
6164    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
6165                                    mc_expensive_sanity_check);
6166    VG_(needs_malloc_replacement)  (MC_(malloc),
6167                                    MC_(__builtin_new),
6168                                    MC_(__builtin_vec_new),
6169                                    MC_(memalign),
6170                                    MC_(calloc),
6171                                    MC_(free),
6172                                    MC_(__builtin_delete),
6173                                    MC_(__builtin_vec_delete),
6174                                    MC_(realloc),
6175                                    MC_(malloc_usable_size),
6176                                    MC_MALLOC_REDZONE_SZB );
6177 
6178    VG_(needs_xml_output)          ();
6179 
6180    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
6181    VG_(track_new_mem_stack_signal)( make_mem_undefined_w_tid );
6182    // We assume that brk()/sbrk() does not initialise new memory.  Is this
6183    // accurate?  John Reiser says:
6184    //
6185    //   0) sbrk() can *decrease* process address space.  No zero fill is done
6186    //   for a decrease, not even the fragment on the high end of the last page
6187    //   that is beyond the new highest address.  For maximum safety and
6188    //   portability, then the bytes in the last page that reside above [the
6189    //   new] sbrk(0) should be considered to be uninitialized, but in practice
6190    //   it is exceedingly likely that they will retain their previous
6191    //   contents.
6192    //
6193    //   1) If an increase is large enough to require new whole pages, then
6194    //   those new whole pages (like all new pages) are zero-filled by the
6195    //   operating system.  So if sbrk(0) already is page aligned, then
6196    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
6197    //
6198    //   2) Any increase that lies within an existing allocated page is not
6199    //   changed.  So if (x = sbrk(0)) is not page aligned, then
6200    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
6201    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
6202    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
6203    //   of them come along for the ride because the operating system deals
6204    //   only in whole pages.  Again, for maximum safety and portability, then
6205    //   anything that lives above [the new] sbrk(0) should be considered
6206    //   uninitialized, but in practice will retain previous contents [zero in
6207    //   this case.]"
6208    //
6209    // In short:
6210    //
6211    //   A key property of sbrk/brk is that new whole pages that are supplied
6212    //   by the operating system *do* get initialized to zero.
6213    //
6214    // As for the portability of all this:
6215    //
6216    //   sbrk and brk are not POSIX.  However, any system that is a derivative
6217    //   of *nix has sbrk and brk because there are too many softwares (such as
6218    //   the Bourne shell) which rely on the traditional memory map (.text,
6219    //   .data+.bss, stack) and the existence of sbrk/brk.
6220    //
6221    // So we should arguably observe all this.  However:
6222    // - The current inaccuracy has caused maybe one complaint in seven years(?)
6223    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
6224    //   doubt most programmers know the above information.
6225    // So I'm not terribly unhappy with marking it as undefined. --njn.
6226    //
6227    // [More:  I think most of what John said only applies to sbrk().  It seems
6228    // that brk() always deals in whole pages.  And since this event deals
6229    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
6230    // just mark all memory it allocates as defined.]
6231    //
6232    VG_(track_new_mem_brk)         ( make_mem_undefined_w_tid );
6233 
6234    // Handling of mmap and mprotect isn't simple (well, it is simple,
6235    // but the justification isn't.)  See comments above, just prior to
6236    // mc_new_mem_mmap.
6237    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
6238    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
6239 
6240    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
6241 
6242    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
6243    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
6244    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
6245 
6246    /* Defer the specification of the new_mem_stack functions to the
6247       post_clo_init function, since we need to first parse the command
6248       line before deciding which set to use. */
6249 
6250 #  ifdef PERF_FAST_STACK
6251    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
6252    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
6253    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
6254    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
6255    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
6256    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
6257    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
6258    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
6259    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
6260 #  endif
6261    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
6262 
6263    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
6264 
6265    VG_(track_pre_mem_read)        ( check_mem_is_defined );
6266    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
6267    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
6268    VG_(track_post_mem_write)      ( mc_post_mem_write );
6269 
6270    if (MC_(clo_mc_level) >= 2)
6271       VG_(track_pre_reg_read)     ( mc_pre_reg_read );
6272 
6273    VG_(track_post_reg_write)                  ( mc_post_reg_write );
6274    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
6275 
6276    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
6277 
6278    init_shadow_memory();
6279    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
6280    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
6281    init_prof_mem();
6282 
6283    tl_assert( mc_expensive_sanity_check() );
6284 
6285    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
6286    tl_assert(sizeof(UWord) == sizeof(Addr));
6287    // Call me paranoid.  I don't care.
6288    tl_assert(sizeof(void*) == sizeof(Addr));
6289 
6290    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
6291    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
6292 
6293    /* This is small.  Always initialise it. */
6294    init_nia_to_ecu_cache();
6295 
6296    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
6297       if we need to, since the command line args haven't been
6298       processed yet.  Hence defer it to mc_post_clo_init. */
6299    tl_assert(ocacheL1 == NULL);
6300    tl_assert(ocacheL2 == NULL);
6301 
6302    /* Check some important stuff.  See extensive comments above
6303       re UNALIGNED_OR_HIGH for background. */
6304 #  if VG_WORDSIZE == 4
6305    tl_assert(sizeof(void*) == 4);
6306    tl_assert(sizeof(Addr)  == 4);
6307    tl_assert(sizeof(UWord) == 4);
6308    tl_assert(sizeof(Word)  == 4);
6309    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
6310    tl_assert(MASK(1) == 0UL);
6311    tl_assert(MASK(2) == 1UL);
6312    tl_assert(MASK(4) == 3UL);
6313    tl_assert(MASK(8) == 7UL);
6314 #  else
6315    tl_assert(VG_WORDSIZE == 8);
6316    tl_assert(sizeof(void*) == 8);
6317    tl_assert(sizeof(Addr)  == 8);
6318    tl_assert(sizeof(UWord) == 8);
6319    tl_assert(sizeof(Word)  == 8);
6320    tl_assert(MAX_PRIMARY_ADDRESS == 0x3FFFFFFFFFULL);
6321    tl_assert(MASK(1) == 0xFFFFFFC000000000ULL);
6322    tl_assert(MASK(2) == 0xFFFFFFC000000001ULL);
6323    tl_assert(MASK(4) == 0xFFFFFFC000000003ULL);
6324    tl_assert(MASK(8) == 0xFFFFFFC000000007ULL);
6325 #  endif
6326 }
6327 
6328 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
6329 
6330 /*--------------------------------------------------------------------*/
6331 /*--- end                                                mc_main.c ---*/
6332 /*--------------------------------------------------------------------*/
6333